Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add kernel-based matrix cumsum #416

Merged
merged 6 commits into from
Jun 21, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion KomaMRICore/ext/KomaAMDGPUExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ end

function KomaMRICore._print_devices(::ROCBackend)
devices = [
Symbol("($(i-1)$(i == 1 ? "*" : " "))") => d.name for
Symbol("($(i-1)$(i == 1 ? "*" : " "))") => AMDGPU.HIP.name(d) for
(i, d) in enumerate(AMDGPU.devices())
]
@info "$(length(AMDGPU.devices())) AMD capable device(s)." devices...
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ function run_spin_precession!(
sig::AbstractArray{Complex{T}},
M::Mag{T},
sim_method::BlochDict,
backend::KA.Backend
) where {T<:Real}
#Simulation
#Motion
Expand All @@ -43,7 +44,7 @@ function run_spin_precession!(
Bz = x .* seq.Gx' .+ y .* seq.Gy' .+ z .* seq.Gz' .+ p.Δw / T(2π * γ)
#Rotation
if is_ADC_on(seq)
ϕ = T(-2π * γ) .* cumtrapz(seq.Δt', Bz)
ϕ = T(-2π * γ) .* KomaMRIBase.cumtrapz(seq.Δt', Bz, backend)
else
ϕ = T(-2π * γ) .* trapz(seq.Δt', Bz)
end
Expand Down
3 changes: 2 additions & 1 deletion KomaMRICore/src/simulation/Bloch/BlochSimulationMethod.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ function run_spin_precession!(
sig::AbstractArray{Complex{T}},
M::Mag{T},
sim_method::SimulationMethod,
backend::KA.Backend
) where {T<:Real}
#Simulation
#Motion
Expand All @@ -52,7 +53,7 @@ function run_spin_precession!(
Bz = x .* seq.Gx' .+ y .* seq.Gy' .+ z .* seq.Gz' .+ p.Δw / T(2π * γ)
#Rotation
if is_ADC_on(seq)
ϕ = T(-2π * γ) .* cumtrapz(seq.Δt', Bz)
ϕ = T(-2π * γ) .* KomaMRIBase.cumtrapz(seq.Δt', Bz, backend)
else
ϕ = T(-2π * γ) .* trapz(seq.Δt', Bz)
end
Expand Down
48 changes: 48 additions & 0 deletions KomaMRICore/src/simulation/Bloch/KernelFunctions.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
using KernelAbstractions: @index, @kernel
Copy link
Member

@cncastillo cncastillo Jun 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you check if the newly added CPU backend adds codecov for the kernel?

Also, add the comments to exclude the coverage for the metal extension.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it does since on the CPU it forwards the call to cumtrapz in KomaMRIBase. I forgot about code coverage not working for Metal, so I added the comments to exclude.


## COV_EXCL_START

"""
cumsum2_kernel

Simple kernel function, computes the cumulative sum of each row of a matrix. Operates
in-place on the input matrix without allocating additional memory.

# Arguments
- 'A': matrix to compute cumsum on
"""
@kernel function cumsum_matrix_rows_kernel!(A)
i = @index(Global)

for k ∈ 2:size(A, 2)
@inbounds A[i, k] += A[i, k-1]
end
end

## COV_EXCL_STOP

"""
cumtrapz

A more efficient GPU implementation of the cumtrapz method defined in TrapezoidalIntegration.jl.
Uses a kernel to compute cumsum along the second dimension.

# Arguments
- `Δt`: (`1 x NΔt ::Matrix{Float64}`, `[s]`) delta time 1-row array
- `x`: (`Ns x (NΔt+1) ::Matrix{Float64}`, `[T]`) magnitude of the field Gx * x + Gy * y +
Gz * z

# Returns
- `y`: (`Ns x NΔt ::Matrix{Float64}`, `[T*s]`) matrix where every column is the
cumulative integration over time of (Gx * x + Gy * y + Gz * z) * Δt for every spin of a
phantom
"""
function KomaMRIBase.cumtrapz(Δt::AbstractArray{T}, x::AbstractArray{T}, backend::KA.GPU) where {T<:Real}
y = (x[:, 2:end] .+ x[:, 1:end-1]) .* (Δt / 2)
cumsum_matrix_rows_kernel!(backend)(y, ndrange=size(y,1))
KA.synchronize(backend)
return y
end

#If on CPU, forward call to cumtrapz in KomaMRIBase
KomaMRIBase.cumtrapz(Δt::AbstractArray{T}, x::AbstractArray{T}, backend::KA.CPU) where {T<:Real} = KomaMRIBase.cumtrapz(Δt, x)
2 changes: 1 addition & 1 deletion KomaMRICore/src/simulation/Functors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ See also [`f32`](@ref) and [`f64`](@ref) to change element type only.
# Examples
```julia
using CUDA
x = gpu(x)
x = x |> gpu
```
"""
function gpu(x)
Expand Down
22 changes: 13 additions & 9 deletions KomaMRICore/src/simulation/SimulatorCore.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ abstract type SimulationMethod end #get all available types by using subtypes(Ko
abstract type SpinStateRepresentation{T<:Real} end #get all available types by using subtypes(KomaMRI.SpinStateRepresentation)

#Defined methods:
include("Bloch/KernelFunctions.jl")
include("Bloch/BlochSimulationMethod.jl") #Defines Bloch simulation method
include("Bloch/BlochDictSimulationMethod.jl") #Defines BlochDict simulation method

Expand Down Expand Up @@ -82,15 +83,16 @@ function run_spin_precession_parallel!(
seq::DiscreteSequence{T},
sig::AbstractArray{Complex{T}},
Xt::SpinStateRepresentation{T},
sim_method::SimulationMethod;
sim_method::SimulationMethod,
backend::KA.Backend;
Nthreads=Threads.nthreads(),
) where {T<:Real}
parts = kfoldperm(length(obj), Nthreads)
dims = [Colon() for i in 1:(ndims(sig) - 1)] # :,:,:,... Ndim times

ThreadsX.foreach(enumerate(parts)) do (i, p)
run_spin_precession!(
@view(obj[p]), seq, @view(sig[dims..., i]), @view(Xt[p]), sim_method
@view(obj[p]), seq, @view(sig[dims..., i]), @view(Xt[p]), sim_method, backend
)
end

Expand Down Expand Up @@ -166,7 +168,8 @@ function run_sim_time_iter!(
seq::DiscreteSequence,
sig::AbstractArray{Complex{T}},
Xt::SpinStateRepresentation{T},
sim_method::SimulationMethod;
sim_method::SimulationMethod,
backend::KA.Backend;
Nblocks=1,
Nthreads=Threads.nthreads(),
parts=[1:length(seq)],
Expand All @@ -193,7 +196,7 @@ function run_sim_time_iter!(
rfs += 1
else
run_spin_precession_parallel!(
obj, seq_block, @view(sig[acq_samples, dims...]), Xt, sim_method; Nthreads
obj, seq_block, @view(sig[acq_samples, dims...]), Xt, sim_method, backend; Nthreads
)
end
samples += Nadc
Expand Down Expand Up @@ -357,10 +360,10 @@ function simulate(
if backend isa KA.GPU
isnothing(sim_params["gpu_device"]) || set_device!(backend, sim_params["gpu_device"])
gpu_name = device_name(backend)
obj = gpu(obj, backend) #Phantom
seqd = gpu(seqd, backend) #DiscreteSequence
Xt = gpu(Xt, backend) #SpinStateRepresentation
sig = gpu(sig, backend) #Signal
obj = obj |> gpu #Phantom
seqd = seqd |> gpu #DiscreteSequence
Xt = Xt |> gpu #SpinStateRepresentation
sig = sig |> gpu #Signal
end

# Simulation
Expand All @@ -373,7 +376,8 @@ function simulate(
seqd,
sig,
Xt,
sim_params["sim_method"];
sim_params["sim_method"],
backend;
Nblocks=length(parts),
Nthreads=sim_params["Nthreads"],
parts,
Expand Down
1 change: 1 addition & 0 deletions KomaMRICore/test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[deps]
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
KomaMRIBase = "d0bc0b20-b151-4d03-b2a4-6ca51751cb9c"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
Expand Down
22 changes: 20 additions & 2 deletions KomaMRICore/test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
using TestItems, TestItemRunner

### NOTE: by default, tests are run on the CPU with the number of threads set to
# Threads.nthreads(). To run on a specific GPU backend, pass the name of the
# trigger package ("AMDGPU", "CUDA", "Metal", or "oneAPI") as a test argument.
# Threads.nthreads(). To run on a specific GPU backend, add the name of the
# backend package ("AMDGPU", "CUDA", "Metal", or "oneAPI") to the test/Project.toml
# file in KomaMRICore and pass the name as a test argument.
#
# Example:
#
Expand Down Expand Up @@ -385,3 +386,20 @@ end
# For the time being, always pass the test
@test true
end

@testitem "GPU Functions" tags=[:core] begin
using Suppressor
import KernelAbstractions as KA
include("initialize.jl")

x = ones(Float32, 1000)
if USE_GPU
x = x |> gpu
@test KA.get_backend(x) isa KA.GPU
else
@test true
end
Copy link
Member

@cncastillo cncastillo Jun 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be good to check that

Use_gpu = true

  • cpu(gpu(x)) is a CPU array

Use_gpu = false

  • cpu and gpu are no-ops


@suppress print_devices()
@test true
end