Skip to content

Commit

Permalink
Fixes loading of extensions (#32)
Browse files Browse the repository at this point in the history
* Add missing using

* Fix rng usage and remove machine argument

* Add runtime generated functions init to extensions

* Fix github docs link

* Add more docs to the kernel generation and add AMDGPU kernel

* WIP fix GPU ext

* Make the extensions load correctly

* Update Numa impl too

* Add warning that GPUs are experimental

* Include new file in the docs
  • Loading branch information
AntonReinhard authored Sep 17, 2024
1 parent 857c322 commit 695bdb6
Show file tree
Hide file tree
Showing 16 changed files with 167 additions and 125 deletions.
2 changes: 1 addition & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ makedocs(;
sitename="ComputableDAGs.jl",
format=Documenter.HTML(;
prettyurls=get(ENV, "CI", "false") == "true",
canonical="https://ComputableDAGs.gitlab.io/ComputableDAGs.jl",
canonical="https://ComputableDAGs.github.io/ComputableDAGs.jl",
assets=String[],
),
pages=pages,
Expand Down
13 changes: 6 additions & 7 deletions docs/src/lib/internals/devices.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,10 @@ Pages = ["devices/numa/impl.jl"]
Order = [:type, :function]
```

### CUDA
For CUDA functionality to be available, the `CUDA.jl` package must be installed separately, as it is only a weak dependency.
### GPUs

### ROCm
For ROCm functionality to be available, the `AMDGPU.jl` package must be installed separately, as it is only a weak dependency.

### oneAPI
For oneAPI functionality to be available, the `oneAPI.jl` package must be installed separately, as it is only a weak dependency.
```@autodocs
Modules = [ComputableDAGs]
Pages = ["devices/ext.jl"]
Order = [:type]
```
14 changes: 13 additions & 1 deletion ext/AMDGPUExt.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
module AMDGPUExt

using ComputableDAGs, AMDGPU
using ComputableDAGs
using UUIDs
using AMDGPU

function __init__()
@debug "Loading AMDGPUExt"

push!(ComputableDAGs.DEVICE_TYPES, ROCmGPU)
ComputableDAGs.CACHE_STRATEGIES[ROCmGPU] = [LocalVariables()]

return nothing
end

# include specialized AMDGPU functions here
include("devices/rocm/impl.jl")
include("devices/rocm/function.jl")

end
11 changes: 10 additions & 1 deletion ext/CUDAExt.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
module CUDAExt

using ComputableDAGs
using UUIDs
using CUDA
using RuntimeGeneratedFunctions

function __init__()
@debug "Loading CUDAExt"

push!(ComputableDAGs.DEVICE_TYPES, CUDAGPU)
ComputableDAGs.CACHE_STRATEGIES[CUDAGPU] = [LocalVariables()]

return nothing
end

# include specialized CUDA functions here
include("devices/cuda/impl.jl")
Expand Down
10 changes: 4 additions & 6 deletions ext/devices/cuda/function.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@

function ComputableDAGs.cuda_kernel(
graph::DAG, instance, machine::Machine, context_module::Module
)
function ComputableDAGs.kernel(::Type{CUDAGPU}, graph::DAG, instance)
machine = cpu_st()
tape = ComputableDAGs.gen_tape(graph, instance, machine, context_module)

init_caches = Expr(:block, tape.initCachesCode...)
assign_inputs = Expr(:block, ComputableDAGs.expr_from_fc.(tape.inputAssignCode)...)
code = Expr(:block, ComputableDAGs.expr_from_fc.(tape.computeCode)...)

function_id = ComputableDAGs.to_var_name(UUIDs.uuid1(rng[1]))
function_id = ComputableDAGs.to_var_name(UUIDs.uuid1(ComputableDAGs.rng[1]))
res_sym = eval(
ComputableDAGs.gen_access_expr(
ComputableDAGs.entry_device(tape.machine), tape.outputSymbol
Expand All @@ -29,5 +27,5 @@ function ComputableDAGs.cuda_kernel(
end"
)

return RuntimeGeneratedFunction(@__MODULE__, context_module, expr)
return expr
end
29 changes: 4 additions & 25 deletions ext/devices/cuda/impl.jl
Original file line number Diff line number Diff line change
@@ -1,24 +1,7 @@
"""
CUDAGPU <: AbstractGPU
Representation of a specific CUDA GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
"""
mutable struct CUDAGPU <: ComputableDAGs.AbstractGPU
device::Any # TODO: what's the cuda device type?
cacheStrategy::CacheStrategy
FLOPS::Float64
end

push!(ComputableDAGs.DEVICE_TYPES, CUDAGPU)

ComputableDAGs.CACHE_STRATEGIES[CUDAGPU] = [LocalVariables()]

ComputableDAGs.default_strategy(::Type{CUDAGPU}) = LocalVariables()

function ComputableDAGs.measure_device!(device::CUDAGPU; verbose::Bool)
if verbose
println("Measuring CUDA GPU $(device.device)")
end
verbose && @info "Measuring CUDA GPU $(device.device)"

# TODO implement
return nothing
Expand All @@ -29,20 +12,16 @@ end
Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(::Type{CUDAGPU}; verbose::Bool=false)
function ComputableDAGs.get_devices(::Type{CUDAGPU}; verbose::Bool=false)
devices = Vector{ComputableDAGs.AbstractDevice}()

if !CUDA.functional()
if verbose
println("CUDA.jl is non-functional")
end
@warn "The CUDA extension is loaded but CUDA.jl is non-functional"
return devices
end

CUDADevices = CUDA.devices()
if verbose
println("Found $(length(CUDADevices)) CUDA devices")
end
verbose && @info "Found $(length(CUDADevices)) CUDA devices"
for device in CUDADevices
push!(devices, CUDAGPU(device, default_strategy(CUDAGPU), -1))
end
Expand Down
31 changes: 5 additions & 26 deletions ext/devices/oneapi/impl.jl
Original file line number Diff line number Diff line change
@@ -1,24 +1,7 @@
"""
oneAPIGPU <: AbstractGPU
Representation of a specific Intel GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
"""
mutable struct oneAPIGPU <: ComputableDAGs.AbstractGPU
device::Any
cacheStrategy::CacheStrategy
FLOPS::Float64
end

push!(ComputableDAGs.DEVICE_TYPES, oneAPIGPU)

ComputableDAGs.CACHE_STRATEGIES[oneAPIGPU] = [LocalVariables()]

ComputableDAGs.default_strategy(::Type{oneAPIGPU}) = LocalVariables()

function ComputableDAGs.measure_device!(device::oneAPIGPU; verbose::Bool)
if verbose
println("Measuring oneAPI GPU $(device.device)")
end
verbose && @info "Measuring oneAPI GPU $(device.device)"

# TODO implement
return nothing
Expand All @@ -29,20 +12,16 @@ end
Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(::Type{oneAPIGPU}; verbose::Bool=false)
devices = Vector{AbstractDevice}()
function ComputableDAGs.get_devices(::Type{oneAPIGPU}; verbose::Bool=false)
devices = Vector{ComputableDAGs.AbstractDevice}()

if !oneAPI.functional()
if verbose
println("oneAPI is non-functional")
end
@warn "the oneAPI extension is loaded but oneAPI.jl is non-functional"
return devices
end

oneAPIDevices = oneAPI.devices()
if verbose
println("Found $(length(oneAPIDevices)) oneAPI devices")
end
verbose && @info "Found $(length(oneAPIDevices)) oneAPI devices"
for device in oneAPIDevices
push!(devices, oneAPIGPU(device, default_strategy(oneAPIGPU), -1))
end
Expand Down
31 changes: 31 additions & 0 deletions ext/devices/rocm/function.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
function ComputableDAGs.kernel(::Type{ROCmGPU}, graph::DAG, instance)
machine = cpu_st()
tape = ComputableDAGs.gen_tape(graph, instance, machine, context_module)

init_caches = Expr(:block, tape.initCachesCode...)
assign_inputs = Expr(:block, ComputableDAGs.expr_from_fc.(tape.inputAssignCode)...)
code = Expr(:block, ComputableDAGs.expr_from_fc.(tape.computeCode)...)

function_id = ComputableDAGs.to_var_name(UUIDs.uuid1(ComputableDAGs.rng[1]))
res_sym = eval(
ComputableDAGs.gen_access_expr(
ComputableDAGs.entry_device(tape.machine), tape.outputSymbol
),
)
expr = Meta.parse(
"function compute_$(function_id)(input_vector, output_vector, n::Int64)
id = (workgroupIdx().x - 1) * workgroupDim().x + workgroupIdx().x
if (id > n)
return
end
@inline data_input = input_vector[id]
$(init_caches)
$(assign_inputs)
$code
@inline output_vector[id] = $res_sym
return nothing
end"
)

return expr
end
33 changes: 5 additions & 28 deletions ext/devices/rocm/impl.jl
Original file line number Diff line number Diff line change
@@ -1,26 +1,7 @@
using AMDGPU

"""
ROCmGPU <: AbstractGPU
Representation of a specific AMD GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
"""
mutable struct ROCmGPU <: ComputableDAGs.AbstractGPU
device::Any
cacheStrategy::CacheStrategy
FLOPS::Float64
end

push!(ComputableDAGs.DEVICE_TYPES, ROCmGPU)

ComputableDAGs.CACHE_STRATEGIES[ROCmGPU] = [LocalVariables()]

ComputableDAGs.default_strategy(::Type{ROCmGPU}) = LocalVariables()

function ComputableDAGs.measure_device!(device::ROCmGPU; verbose::Bool)
if verbose
println("Measuring ROCm GPU $(device.device)")
end
verbose && @info "Measuring ROCm GPU $(device.device)"

# TODO implement
return nothing
Expand All @@ -31,20 +12,16 @@ end
Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(::Type{ROCmGPU}; verbose::Bool=false)
devices = Vector{AbstractDevice}()
function ComputableDAGs.get_devices(::Type{ROCmGPU}; verbose::Bool=false)
devices = Vector{ComputableDAGs.AbstractDevice}()

if !AMDGPU.functional()
if verbose
println("AMDGPU is non-functional")
end
@warn "The AMDGPU extension is loaded but AMDGPU.jl is non-functional"
return devices
end

AMDDevices = AMDGPU.devices()
if verbose
println("Found $(length(AMDDevices)) AMD devices")
end
verbose && @info "Found $(length(AMDDevices)) AMD devices"
for device in AMDDevices
push!(devices, ROCmGPU(device, default_strategy(ROCmGPU), -1))
end
Expand Down
13 changes: 12 additions & 1 deletion ext/oneAPIExt.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
module oneAPIExt

using ComputableDAGs, oneAPI
using ComputableDAGs
using UUIDs
using oneAPI

function __init__()
@debug "Loading oneAPIExt"

push!(ComputableDAGs.DEVICE_TYPES, oneAPIGPU)
ComputableDAGs.CACHE_STRATEGIES[oneAPIGPU] = [LocalVariables()]

return nothing
end

# include specialized oneAPI functions here
include("devices/oneapi/impl.jl")
Expand Down
5 changes: 3 additions & 2 deletions src/ComputableDAGs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ export get_machine_info, cpu_st
export CacheStrategy, default_strategy
export LocalVariables, Dictionary

# CUDAExt
export cuda_kernel
# GPU Extensions
export kernel, CUDAGPU, ROCmGPU, oneAPIGPU

include("devices/interface.jl")
include("task/type.jl")
Expand Down Expand Up @@ -124,6 +124,7 @@ include("devices/detect.jl")
include("devices/impl.jl")

include("devices/numa/impl.jl")
include("devices/ext.jl")

include("scheduler/interface.jl")
include("scheduler/greedy.jl")
Expand Down
44 changes: 44 additions & 0 deletions src/devices/ext.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# file for struct definitions used by the extensions
# since extensions can't export names themselves

"""
CUDAGPU <: AbstractGPU
Representation of a specific CUDA GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
!!! note
This requires CUDA to be loaded to be useful.
"""
mutable struct CUDAGPU <: AbstractGPU
device::Any # CuDevice
cacheStrategy::CacheStrategy
FLOPS::Float64
end

"""
oneAPIGPU <: AbstractGPU
Representation of a specific Intel GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
!!! note
This requires oneAPI to be loaded to be useful.
"""
mutable struct oneAPIGPU <: AbstractGPU
device::Any # oneAPI.oneL0.ZeDevice
cacheStrategy::CacheStrategy
FLOPS::Float64
end

"""
ROCmGPU <: AbstractGPU
Representation of a specific AMD GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
!!! note
This requires AMDGPU to be loaded to be useful.
"""
mutable struct ROCmGPU <: AbstractGPU
device::Any # HIPDevice
cacheStrategy::CacheStrategy
FLOPS::Float64
end
11 changes: 1 addition & 10 deletions src/devices/impl.jl
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,6 @@ It is the simplest machine definition possible and produces a simple function wh
"""
function cpu_st()
return Machine(
[
ComputableDAGs.NumaNode(
0,
1,
ComputableDAGs.default_strategy(ComputableDAGs.NumaNode),
-1.0,
UUIDs.uuid1(),
),
],
[-1.0;;],
[NumaNode(0, 1, default_strategy(NumaNode), -1.0, UUIDs.uuid1())], [-1.0;;]
)
end
Loading

0 comments on commit 695bdb6

Please sign in to comment.