Skip to content

Commit

Permalink
Merged with upstream changes from develop
Browse files Browse the repository at this point in the history
  • Loading branch information
drreynolds committed Oct 27, 2023
2 parents 7472714 + b84b330 commit 8fddce4
Show file tree
Hide file tree
Showing 28 changed files with 1,359 additions and 518 deletions.
1 change: 0 additions & 1 deletion .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ SpacesInConditionalStatement : false
SpacesInContainerLiterals : true
SpacesInParentheses : false
SpacesInSquareBrackets : false
SpaceBeforeSquareBrackets : false
Standard : c++14
TabWidth: 2
UseCRLF : false
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ Fixed scaling bug in `SUNMatScaleAddI_Sparse` for non-square matrices.
Fixed missing soversions in some `SUNLinearSolver` and `SUNNonlinearSolver`
CMake targets.

Fixed the build system support for MAGMA when using a NVIDIA HPC SDK installation of CUDA
and fixed the targets used for rocBLAS and rocSPARSE.

Added the third order ERK method `ARKODE_SHU_OSHER_3_2_3` the fourth order
ERK method `ARKODE_SOFRONIOU_SPALETTA_5_3_4`, the sixth order ERK method
`ARKODE_VERNER_9_5_6`, the seventh order ERK method `ARKODE_VERNER_10_6_7`,
Expand Down
2 changes: 2 additions & 0 deletions cmake/macros/SundialsAddExamplesGinkgo.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ macro(sundials_add_examples_ginkgo EXAMPLES_VAR)
elseif(backend MATCHES "HIP")
set_source_files_properties(${example} PROPERTIES LANGUAGE CXX)
set(vector nvechip)
elseif(backend MATCHES "DPCPP")
set(vector nvecsycl)
elseif(backend MATCHES "OMP")
set(vector nvecopenmp)
elseif(backend MATCHES "REF")
Expand Down
42 changes: 30 additions & 12 deletions cmake/tpl/FindMAGMA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -66,24 +66,42 @@ if(MAGMA_LIBRARY AND MAGMA_INCLUDE_DIR)
list(SUBLIST _libraries_list 1 -1 _libraries_list) # remove 'Libs:' part

set(_interface_libraires )

if(SUNDIALS_MAGMA_BACKENDS MATCHES "HIP")
if(NOT TARGET roc::hipblas)
find_package(hipblas REQUIRED)
endif()
if(NOT TARGET roc::hipsparse)
find_package(hipsparse REQUIRED)
endif()
# MAGMA does not reliably include these in the pkgconfig file
list(APPEND _interface_libraires "roc::hipblas;roc::hipsparse")
endif()

if(SUNDIALS_MAGMA_BACKENDS MATCHES "CUDA")
if (NOT TARGET CUDA::cudart)
find_package(CUDAToolkit REQUIRED)
endif()
endif()

foreach(lib ${_libraries_list})
if(NOT (lib STREQUAL "-lmagma" OR lib STREQUAL "-lmagma_sparse"
OR lib STREQUAL "-L\${libdir}" OR lib STREQUAL "") )

# Remove -l only from the beginning of the string
string(REPLACE "^-l" "" lib ${lib})
list(APPEND _interface_libraires ${lib})

# Check if we need to find roc::hipblas or roc::hipsparse
if(SUNDIALS_MAGMA_BACKENDS MATCHES "HIP")
if((lib STREQUAL "roc::hipblas") AND (NOT TARGET roc::hipblas))
find_package(hipblas REQUIRED)

# Check if we need to find cusparse or cublas
if(SUNDIALS_MAGMA_BACKENDS MATCHES "CUDA")
# Replace cublas, cusparse with the CMake targets because the library path in
# the magma pkgconfig is not reliable. Sepcifically, the path is wrong on systems
# like Perlmutter where the NVIDIA HPC SDK is used.
if(lib STREQUAL "-lcublas")
set(lib CUDA::cublas)
endif()
if((lib STREQUAL "roc::hipsparse") AND (NOT TARGET roc::hipsparse))
find_package(hipsparse REQUIRED)
if(lib STREQUAL "-lcusparse")
set(lib CUDA::cusparse)
endif()
endif()


list(APPEND _interface_libraires ${lib})
endif()
endforeach()

Expand Down
6 changes: 6 additions & 0 deletions doc/arkode/guide/source/Introduction.rst
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ Changes from previous versions
Changes in vX.X.X
-----------------

Fixed the build system support for MAGMA when using a NVIDIA HPC SDK installation of CUDA
and fixed the targets used for rocBLAS and rocSPARSE.

Fixed a regression introduced by the stop time bug fix in v6.6.1 where ARKODE
steppers would return at the stop time rather than the requested output time if
the stop time was reached in the same step in which the output time was passed.
Expand All @@ -153,6 +156,9 @@ Fixed scaling bug in ``SUNMatScaleAddI_Sparse`` for non-square matrices.
Fixed missing soversions in some ``SUNLinearSolver`` and ``SUNNonlinearSolver``
CMake targets.

Fixed the build system support for MAGMA when using a NVIDIA HPC SDK installation of CUDA
and fixed the targets used for rocBLAS and rocSPARSE.

Added the third order ERK method ``ARKODE_SHU_OSHER_3_2_3``, the fourth order
ERK method ``ARKODE_SOFRONIOU_SPALETTA_5_3_4``, the sixth order ERK method
``ARKODE_VERNER_9_5_6``, the seventh order ERK method ``ARKODE_VERNER_10_6_7``,
Expand Down
3 changes: 3 additions & 0 deletions doc/cvode/guide/source/Introduction.rst
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ Changes from previous versions
Changes in vX.X.X
-----------------

Fixed the build system support for MAGMA when using a NVIDIA HPC SDK installation of CUDA
and fixed the targets used for rocBLAS and rocSPARSE.

Fixed a regression introduced by the stop time bug fix in v6.6.1 where CVODE
would return at the stop time rather than the requested output time if the stop
time was reached in the same step in which the output time was passed.
Expand Down
3 changes: 3 additions & 0 deletions doc/cvodes/guide/source/Introduction.rst
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ Changes from previous versions
Changes in vX.X.X
-----------------

Fixed the build system support for MAGMA when using a NVIDIA HPC SDK installation of CUDA
and fixed the targets used for rocBLAS and rocSPARSE.

Fixed a regression introduced by the stop time bug fix in v6.6.1 where CVODES
would return at the stop time rather than the requested output time if the stop
time was reached in the same step in which the output time was passed.
Expand Down
3 changes: 3 additions & 0 deletions doc/ida/guide/source/Introduction.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ Changes from previous versions
Changes in vX.X.X
-----------------

Fixed the build system support for MAGMA when using a NVIDIA HPC SDK installation of CUDA
and fixed the targets used for rocBLAS and rocSPARSE.

Fixed a regression introduced by the stop time bug fix in v6.6.1 where IDA would
return at the stop time rather than the requested output time if the stop time
was reached in the same step in which the output time was passed.
Expand Down
3 changes: 3 additions & 0 deletions doc/idas/guide/source/Introduction.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ Changes from previous versions
Changes in vX.X.X
-----------------

Fixed the build system support for MAGMA when using a NVIDIA HPC SDK installation of CUDA
and fixed the targets used for rocBLAS and rocSPARSE.

Fixed a regression introduced by the stop time bug fix in v6.6.1 where IDAS
would return at the stop time rather than the requested output time if the stop
time was reached in the same step in which the output time was passed.
Expand Down
3 changes: 3 additions & 0 deletions doc/kinsol/guide/source/Introduction.rst
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ Changes from previous versions
Changes in vX.X.X
-----------------

Fixed the build system support for MAGMA when using a NVIDIA HPC SDK installation of CUDA
and fixed the targets used for rocBLAS and rocSPARSE.

Improved computational complexity of ``SUNMatScaleAddI_Sparse`` from ``O(M*N)`` to
``O(NNZ)``.

Expand Down
5 changes: 4 additions & 1 deletion examples/cvode/ginkgo/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ set(cpu_gpu_examples

sundials_add_examples_ginkgo(cpu_gpu_examples
TARGETS sundials_cvode
BACKENDS REF OMP CUDA HIP)
BACKENDS REF OMP CUDA HIP DPCPP)

# Examples that only support CPU Ginkgo backends
set(cpu_examples
Expand All @@ -39,6 +39,9 @@ if(EXAMPLES_INSTALL)
if(SUNDIALS_GINKGO_BACKENDS MATCHES "HIP")
list(APPEND vectors nvechip)
endif()
if(SUNDIALS_GINKGO_BACKENDS MATCHES "DPCPP")
list(APPEND vectors nvecsycl)
endif()
if((SUNDIALS_GINKGO_BACKENDS MATCHES "OMP") OR
(SUNDIALS_GINKGO_BACKENDS MATCHES "REF"))
list(APPEND vectors nvecserial)
Expand Down
77 changes: 77 additions & 0 deletions examples/cvode/ginkgo/cv_heat2D_ginkgo.DPCPP.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@

2D Heat problem:
----------------------------
kx = 1
ky = 1
tf = 1
xu = 1
yu = 1
nx = 64
ny = 64
dx = 0.015873
dy = 0.015873
----------------------------
rtol = 0.0001
atol = 1e-08
----------------------------
lin iters = 20
eps lin = 0
----------------------------
output = 0
----------------------------

t ||u||_rms max error
-----------------------------------------------------------------------
0.000000000000000e+00 1.273091462283009e+00 0.000000000000000e+00
5.000000000000000e-02 1.265953031236337e+00 5.779434661301597e-04
1.000000000000000e-01 1.245126467995815e+00 8.596410825743028e-04
1.500000000000000e-01 1.212971698816507e+00 1.027071183737238e-03
2.000000000000000e-01 1.173149707911348e+00 1.049506292939650e-03
2.500000000000000e-01 1.129970993609124e+00 7.767258516966358e-04
3.000000000000000e-01 1.088067923761304e+00 3.857233565973672e-04
3.500000000000000e-01 1.051569245238569e+00 2.296842605538085e-04
4.000000000000000e-01 1.023519508142414e+00 1.160865021105906e-04
4.500000000000000e-01 1.005965995289331e+00 3.382124480899584e-05
4.999999999999999e-01 9.999934385586851e-01 6.776957530241212e-05
5.499999999999999e-01 1.005920028619227e+00 1.074298825753939e-04
6.000000000000000e-01 1.023439646225216e+00 1.256532195708093e-04
6.500000000000000e-01 1.051474380012092e+00 4.493207354094864e-05
7.000000000000001e-01 1.087965937316374e+00 8.048432853913212e-05
7.500000000000001e-01 1.129792873621271e+00 2.390181284921411e-04
8.000000000000002e-01 1.172918427971992e+00 4.322892993839922e-04
8.500000000000002e-01 1.212840417005807e+00 6.887143222911174e-04
9.000000000000002e-01 1.245177171528506e+00 9.911108446238881e-04
9.500000000000003e-01 1.266240637720725e+00 1.309517693130591e-03
1.000000000000000e+00 1.273471195699113e+00 1.189685923645323e-03
-----------------------------------------------------------------------

Final integrator statistics:
Current time = 1
Steps = 41
Error test fails = 0
NLS step fails = 0
Initial step size = 0.002110117778857815
Last step size = 0.02437232616233551
Current step size = 0.02437232616233551
Last method order = 3
Current method order = 3
Stab. lim. order reductions = 0
RHS fn evals = 52
NLS iters = 49
NLS fails = 0
NLS iters per step = 1.195121951219512
LS setups = 7
Jac fn evals = 1
LS RHS fn evals = 0
Prec setup evals = 0
Prec solves = 0
LS iters = 873
LS fails = 0
Jac-times setups = 0
Jac-times evals = 0
LS iters per NLS iter = 17.81632653061224
Jac evals per NLS iter = 0.02040816326530612
Prec evals per NLS iter = 0
Root fn evals = 0

Max error = 1.189685923645323e-03
77 changes: 77 additions & 0 deletions examples/cvode/ginkgo/cv_heat2D_ginkgo.HIP.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@

2D Heat problem:
----------------------------
kx = 1
ky = 1
tf = 1
xu = 1
yu = 1
nx = 64
ny = 64
dx = 0.015873
dy = 0.015873
----------------------------
rtol = 0.0001
atol = 1e-08
----------------------------
lin iters = 20
eps lin = 0
----------------------------
output = 0
----------------------------

t ||u||_rms max error
-----------------------------------------------------------------------
0.000000000000000e+00 1.273091462283009e+00 0.000000000000000e+00
5.000000000000000e-02 1.265953031236678e+00 5.779434664550109e-04
1.000000000000000e-01 1.245126468025294e+00 8.596397006188639e-04
1.500000000000000e-01 1.212971692633635e+00 1.027175433592653e-03
2.000000000000000e-01 1.173149607363054e+00 1.048511182224932e-03
2.500000000000000e-01 1.129971118809724e+00 7.777031646749588e-04
3.000000000000000e-01 1.088068652479702e+00 3.867786296469777e-04
3.500000000000000e-01 1.051569453796381e+00 2.291655197173004e-04
4.000000000000000e-01 1.023519691519509e+00 1.152507676536185e-04
4.500000000000000e-01 1.005966466128100e+00 3.464712602863074e-05
4.999999999999999e-01 9.999941074735683e-01 6.549023902890916e-05
5.499999999999999e-01 1.005920139252285e+00 1.085862394125670e-04
6.000000000000000e-01 1.023440066617863e+00 1.253667245226797e-04
6.500000000000000e-01 1.051474489311814e+00 4.368064039983466e-05
7.000000000000001e-01 1.087966430721224e+00 8.251704806849780e-05
7.500000000000001e-01 1.129793211633907e+00 2.403035068856418e-04
8.000000000000002e-01 1.172918720617323e+00 4.322501964297842e-04
8.500000000000002e-01 1.212839862652567e+00 6.883283219258907e-04
9.000000000000002e-01 1.245175128903723e+00 9.857170108882318e-04
9.500000000000003e-01 1.266235911062742e+00 1.301833676780495e-03
1.000000000000000e+00 1.273469281873646e+00 1.183015268845011e-03
-----------------------------------------------------------------------

Final integrator statistics:
Current time = 1
Steps = 41
Error test fails = 0
NLS step fails = 0
Initial step size = 0.002110117764420172
Last step size = 0.02782878040979117
Current step size = 0.02782878040979117
Last method order = 3
Current method order = 3
Stab. lim. order reductions = 0
RHS fn evals = 52
NLS iters = 49
NLS fails = 0
NLS iters per step = 1.195121951219512
LS setups = 7
Jac fn evals = 1
LS RHS fn evals = 0
Prec setup evals = 0
Prec solves = 0
LS iters = 875
LS fails = 0
Jac-times setups = 0
Jac-times evals = 0
LS iters per NLS iter = 17.85714285714286
Jac evals per NLS iter = 0.02040816326530612
Prec evals per NLS iter = 0
Root fn evals = 0

Max error = 1.183015268845011e-03
Loading

0 comments on commit 8fddce4

Please sign in to comment.