diff --git a/KomaMRICore/ext/KomaAMDGPUExt.jl b/KomaMRICore/ext/KomaAMDGPUExt.jl index 08f08f96c..112dc2285 100644 --- a/KomaMRICore/ext/KomaAMDGPUExt.jl +++ b/KomaMRICore/ext/KomaAMDGPUExt.jl @@ -18,7 +18,7 @@ end function KomaMRICore._print_devices(::ROCBackend) devices = [ - Symbol("($(i-1)$(i == 1 ? "*" : " "))") => d.name for + Symbol("($(i-1)$(i == 1 ? "*" : " "))") => AMDGPU.HIP.name(d) for (i, d) in enumerate(AMDGPU.devices()) ] @info "$(length(AMDGPU.devices())) AMD capable device(s)." devices... diff --git a/KomaMRICore/ext/KomaMetalExt.jl b/KomaMRICore/ext/KomaMetalExt.jl index 3d522e0b8..bc452f31e 100644 --- a/KomaMRICore/ext/KomaMetalExt.jl +++ b/KomaMRICore/ext/KomaMetalExt.jl @@ -1,3 +1,5 @@ +## COV_EXCL_START + module KomaMetalExt using Metal @@ -32,4 +34,6 @@ function __init__() @warn "Metal does not support all array operations used by KomaMRI (https://github.com/JuliaGPU/Metal.jl/issues/348). GPU performance may be slower than expected" end -end \ No newline at end of file +end + +## COV_EXCL_STOP \ No newline at end of file diff --git a/KomaMRICore/src/simulation/Bloch/BlochDictSimulationMethod.jl b/KomaMRICore/src/simulation/Bloch/BlochDictSimulationMethod.jl index c9c89549c..3cf7abb6c 100644 --- a/KomaMRICore/src/simulation/Bloch/BlochDictSimulationMethod.jl +++ b/KomaMRICore/src/simulation/Bloch/BlochDictSimulationMethod.jl @@ -35,6 +35,7 @@ function run_spin_precession!( sig::AbstractArray{Complex{T}}, M::Mag{T}, sim_method::BlochDict, + backend::KA.Backend ) where {T<:Real} #Simulation #Motion @@ -43,7 +44,7 @@ function run_spin_precession!( Bz = x .* seq.Gx' .+ y .* seq.Gy' .+ z .* seq.Gz' .+ p.Δw / T(2π * γ) #Rotation if is_ADC_on(seq) - ϕ = T(-2π * γ) .* cumtrapz(seq.Δt', Bz) + ϕ = T(-2π * γ) .* KomaMRIBase.cumtrapz(seq.Δt', Bz, backend) else ϕ = T(-2π * γ) .* trapz(seq.Δt', Bz) end diff --git a/KomaMRICore/src/simulation/Bloch/BlochSimulationMethod.jl b/KomaMRICore/src/simulation/Bloch/BlochSimulationMethod.jl index 284da0c13..f60a8b278 100644 --- a/KomaMRICore/src/simulation/Bloch/BlochSimulationMethod.jl +++ b/KomaMRICore/src/simulation/Bloch/BlochSimulationMethod.jl @@ -44,6 +44,7 @@ function run_spin_precession!( sig::AbstractArray{Complex{T}}, M::Mag{T}, sim_method::SimulationMethod, + backend::KA.Backend ) where {T<:Real} #Simulation #Motion @@ -52,7 +53,7 @@ function run_spin_precession!( Bz = x .* seq.Gx' .+ y .* seq.Gy' .+ z .* seq.Gz' .+ p.Δw / T(2π * γ) #Rotation if is_ADC_on(seq) - ϕ = T(-2π * γ) .* cumtrapz(seq.Δt', Bz) + ϕ = T(-2π * γ) .* KomaMRIBase.cumtrapz(seq.Δt', Bz, backend) else ϕ = T(-2π * γ) .* trapz(seq.Δt', Bz) end diff --git a/KomaMRICore/src/simulation/Bloch/KernelFunctions.jl b/KomaMRICore/src/simulation/Bloch/KernelFunctions.jl new file mode 100644 index 000000000..15728bcb6 --- /dev/null +++ b/KomaMRICore/src/simulation/Bloch/KernelFunctions.jl @@ -0,0 +1,50 @@ +using KernelAbstractions: @index, @kernel + +## COV_EXCL_START +#! format: off + +""" + cumsum2_kernel + +Simple kernel function, computes the cumulative sum of each row of a matrix. Operates +in-place on the input matrix without allocating additional memory. + +# Arguments +- 'A': matrix to compute cumsum on +""" +@kernel function cumsum_matrix_rows_kernel!(A) + i = @index(Global) + + for k ∈ 2:size(A, 2) + @inbounds A[i, k] += A[i, k-1] + end +end + +#! format: on +## COV_EXCL_STOP + +""" + cumtrapz + +A more efficient GPU implementation of the cumtrapz method defined in TrapezoidalIntegration.jl. +Uses a kernel to compute cumsum along the second dimension. + +# Arguments +- `Δt`: (`1 x NΔt ::Matrix{Float64}`, `[s]`) delta time 1-row array +- `x`: (`Ns x (NΔt+1) ::Matrix{Float64}`, `[T]`) magnitude of the field Gx * x + Gy * y + + Gz * z + +# Returns +- `y`: (`Ns x NΔt ::Matrix{Float64}`, `[T*s]`) matrix where every column is the + cumulative integration over time of (Gx * x + Gy * y + Gz * z) * Δt for every spin of a + phantom +""" +function KomaMRIBase.cumtrapz(Δt::AbstractArray{T}, x::AbstractArray{T}, backend::KA.GPU) where {T<:Real} + y = (x[:, 2:end] .+ x[:, 1:end-1]) .* (Δt / 2) + cumsum_matrix_rows_kernel!(backend)(y, ndrange=size(y,1)) + KA.synchronize(backend) + return y +end + +#If on CPU, forward call to cumtrapz in KomaMRIBase +KomaMRIBase.cumtrapz(Δt::AbstractArray{T}, x::AbstractArray{T}, backend::KA.CPU) where {T<:Real} = KomaMRIBase.cumtrapz(Δt, x) \ No newline at end of file diff --git a/KomaMRICore/src/simulation/Functors.jl b/KomaMRICore/src/simulation/Functors.jl index 92038efe5..4aac6628f 100644 --- a/KomaMRICore/src/simulation/Functors.jl +++ b/KomaMRICore/src/simulation/Functors.jl @@ -22,7 +22,7 @@ See also [`f32`](@ref) and [`f64`](@ref) to change element type only. # Examples ```julia using CUDA -x = gpu(x) +x = x |> gpu ``` """ function gpu(x) @@ -31,7 +31,8 @@ function gpu(x) if (BACKEND[] isa KA.GPU) return gpu(x, BACKEND[]) else - @error "function 'gpu' called with no functional backends available. Add 'using CUDA / Metal / AMDGPU / oneAPI' to your code and try again" + @warn "function 'gpu' called with no functional backends available. Add 'using CUDA / Metal / AMDGPU / oneAPI' to your code and try again" + return x end end diff --git a/KomaMRICore/src/simulation/SimulatorCore.jl b/KomaMRICore/src/simulation/SimulatorCore.jl index d52797a33..fabb828d1 100644 --- a/KomaMRICore/src/simulation/SimulatorCore.jl +++ b/KomaMRICore/src/simulation/SimulatorCore.jl @@ -2,6 +2,7 @@ abstract type SimulationMethod end #get all available types by using subtypes(Ko abstract type SpinStateRepresentation{T<:Real} end #get all available types by using subtypes(KomaMRI.SpinStateRepresentation) #Defined methods: +include("Bloch/KernelFunctions.jl") include("Bloch/BlochSimulationMethod.jl") #Defines Bloch simulation method include("Bloch/BlochDictSimulationMethod.jl") #Defines BlochDict simulation method @@ -82,7 +83,8 @@ function run_spin_precession_parallel!( seq::DiscreteSequence{T}, sig::AbstractArray{Complex{T}}, Xt::SpinStateRepresentation{T}, - sim_method::SimulationMethod; + sim_method::SimulationMethod, + backend::KA.Backend; Nthreads=Threads.nthreads(), ) where {T<:Real} parts = kfoldperm(length(obj), Nthreads) @@ -90,7 +92,7 @@ function run_spin_precession_parallel!( ThreadsX.foreach(enumerate(parts)) do (i, p) run_spin_precession!( - @view(obj[p]), seq, @view(sig[dims..., i]), @view(Xt[p]), sim_method + @view(obj[p]), seq, @view(sig[dims..., i]), @view(Xt[p]), sim_method, backend ) end @@ -166,7 +168,8 @@ function run_sim_time_iter!( seq::DiscreteSequence, sig::AbstractArray{Complex{T}}, Xt::SpinStateRepresentation{T}, - sim_method::SimulationMethod; + sim_method::SimulationMethod, + backend::KA.Backend; Nblocks=1, Nthreads=Threads.nthreads(), parts=[1:length(seq)], @@ -193,7 +196,7 @@ function run_sim_time_iter!( rfs += 1 else run_spin_precession_parallel!( - obj, seq_block, @view(sig[acq_samples, dims...]), Xt, sim_method; Nthreads + obj, seq_block, @view(sig[acq_samples, dims...]), Xt, sim_method, backend; Nthreads ) end samples += Nadc @@ -357,10 +360,10 @@ function simulate( if backend isa KA.GPU isnothing(sim_params["gpu_device"]) || set_device!(backend, sim_params["gpu_device"]) gpu_name = device_name(backend) - obj = gpu(obj, backend) #Phantom - seqd = gpu(seqd, backend) #DiscreteSequence - Xt = gpu(Xt, backend) #SpinStateRepresentation - sig = gpu(sig, backend) #Signal + obj = obj |> gpu #Phantom + seqd = seqd |> gpu #DiscreteSequence + Xt = Xt |> gpu #SpinStateRepresentation + sig = sig |> gpu #Signal end # Simulation @@ -373,7 +376,8 @@ function simulate( seqd, sig, Xt, - sim_params["sim_method"]; + sim_params["sim_method"], + backend; Nblocks=length(parts), Nthreads=sim_params["Nthreads"], parts, diff --git a/KomaMRICore/test/Project.toml b/KomaMRICore/test/Project.toml index 8abe380c8..30afdd767 100644 --- a/KomaMRICore/test/Project.toml +++ b/KomaMRICore/test/Project.toml @@ -1,5 +1,6 @@ [deps] HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" KomaMRIBase = "d0bc0b20-b151-4d03-b2a4-6ca51751cb9c" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb" diff --git a/KomaMRICore/test/runtests.jl b/KomaMRICore/test/runtests.jl index c6dd5900d..315d37f1a 100644 --- a/KomaMRICore/test/runtests.jl +++ b/KomaMRICore/test/runtests.jl @@ -1,8 +1,9 @@ using TestItems, TestItemRunner ### NOTE: by default, tests are run on the CPU with the number of threads set to -# Threads.nthreads(). To run on a specific GPU backend, pass the name of the -# trigger package ("AMDGPU", "CUDA", "Metal", or "oneAPI") as a test argument. +# Threads.nthreads(). To run on a specific GPU backend, add the name of the +# backend package ("AMDGPU", "CUDA", "Metal", or "oneAPI") to the test/Project.toml +# file in KomaMRICore and pass the name as a test argument. # # Example: # @@ -385,3 +386,28 @@ end # For the time being, always pass the test @test true end + +@testitem "GPU Functions" tags=[:core] begin + using Suppressor + import KernelAbstractions as KA + include("initialize.jl") + + x = ones(Float32, 1000) + @suppress begin + y = x |> gpu + if USE_GPU + @test KA.get_backend(y) isa KA.GPU + y = y |> cpu + @test KA.get_backend(y) isa KA.CPU + else + # Test that gpu and cpu are no-ops + @test y == x + y = y |> cpu + @test y == x + end + end + + + @suppress print_devices() + @test true +end \ No newline at end of file