Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes loading of extensions #32

Merged
merged 10 commits into from
Sep 17, 2024
2 changes: 1 addition & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ makedocs(;
sitename="ComputableDAGs.jl",
format=Documenter.HTML(;
prettyurls=get(ENV, "CI", "false") == "true",
canonical="https://ComputableDAGs.gitlab.io/ComputableDAGs.jl",
canonical="https://ComputableDAGs.github.io/ComputableDAGs.jl",
assets=String[],
),
pages=pages,
Expand Down
13 changes: 6 additions & 7 deletions docs/src/lib/internals/devices.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,10 @@ Pages = ["devices/numa/impl.jl"]
Order = [:type, :function]
```

### CUDA
For CUDA functionality to be available, the `CUDA.jl` package must be installed separately, as it is only a weak dependency.
### GPUs

### ROCm
For ROCm functionality to be available, the `AMDGPU.jl` package must be installed separately, as it is only a weak dependency.

### oneAPI
For oneAPI functionality to be available, the `oneAPI.jl` package must be installed separately, as it is only a weak dependency.
```@autodocs
Modules = [ComputableDAGs]
Pages = ["devices/ext.jl"]
Order = [:type]
```
14 changes: 13 additions & 1 deletion ext/AMDGPUExt.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
module AMDGPUExt

using ComputableDAGs, AMDGPU
using ComputableDAGs
using UUIDs
using AMDGPU

function __init__()
@debug "Loading AMDGPUExt"

push!(ComputableDAGs.DEVICE_TYPES, ROCmGPU)
ComputableDAGs.CACHE_STRATEGIES[ROCmGPU] = [LocalVariables()]

return nothing
end

# include specialized AMDGPU functions here
include("devices/rocm/impl.jl")
include("devices/rocm/function.jl")

end
11 changes: 10 additions & 1 deletion ext/CUDAExt.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
module CUDAExt

using ComputableDAGs
using UUIDs
using CUDA
using RuntimeGeneratedFunctions

function __init__()
@debug "Loading CUDAExt"

push!(ComputableDAGs.DEVICE_TYPES, CUDAGPU)
ComputableDAGs.CACHE_STRATEGIES[CUDAGPU] = [LocalVariables()]

return nothing
end

# include specialized CUDA functions here
include("devices/cuda/impl.jl")
Expand Down
10 changes: 4 additions & 6 deletions ext/devices/cuda/function.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@

function ComputableDAGs.cuda_kernel(
graph::DAG, instance, machine::Machine, context_module::Module
)
function ComputableDAGs.kernel(::Type{CUDAGPU}, graph::DAG, instance)
machine = cpu_st()
tape = ComputableDAGs.gen_tape(graph, instance, machine, context_module)

init_caches = Expr(:block, tape.initCachesCode...)
assign_inputs = Expr(:block, ComputableDAGs.expr_from_fc.(tape.inputAssignCode)...)
code = Expr(:block, ComputableDAGs.expr_from_fc.(tape.computeCode)...)

function_id = ComputableDAGs.to_var_name(UUIDs.uuid1(rng[1]))
function_id = ComputableDAGs.to_var_name(UUIDs.uuid1(ComputableDAGs.rng[1]))
res_sym = eval(
ComputableDAGs.gen_access_expr(
ComputableDAGs.entry_device(tape.machine), tape.outputSymbol
Expand All @@ -29,5 +27,5 @@ function ComputableDAGs.cuda_kernel(
end"
)

return RuntimeGeneratedFunction(@__MODULE__, context_module, expr)
return expr
end
29 changes: 4 additions & 25 deletions ext/devices/cuda/impl.jl
Original file line number Diff line number Diff line change
@@ -1,24 +1,7 @@
"""
CUDAGPU <: AbstractGPU

Representation of a specific CUDA GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
"""
mutable struct CUDAGPU <: ComputableDAGs.AbstractGPU
device::Any # TODO: what's the cuda device type?
cacheStrategy::CacheStrategy
FLOPS::Float64
end

push!(ComputableDAGs.DEVICE_TYPES, CUDAGPU)

ComputableDAGs.CACHE_STRATEGIES[CUDAGPU] = [LocalVariables()]

ComputableDAGs.default_strategy(::Type{CUDAGPU}) = LocalVariables()

function ComputableDAGs.measure_device!(device::CUDAGPU; verbose::Bool)
if verbose
println("Measuring CUDA GPU $(device.device)")
end
verbose && @info "Measuring CUDA GPU $(device.device)"

# TODO implement
return nothing
Expand All @@ -29,20 +12,16 @@ end

Return a Vector of [`CUDAGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(::Type{CUDAGPU}; verbose::Bool=false)
function ComputableDAGs.get_devices(::Type{CUDAGPU}; verbose::Bool=false)
devices = Vector{ComputableDAGs.AbstractDevice}()

if !CUDA.functional()
if verbose
println("CUDA.jl is non-functional")
end
@warn "The CUDA extension is loaded but CUDA.jl is non-functional"
return devices
end

CUDADevices = CUDA.devices()
if verbose
println("Found $(length(CUDADevices)) CUDA devices")
end
verbose && @info "Found $(length(CUDADevices)) CUDA devices"
for device in CUDADevices
push!(devices, CUDAGPU(device, default_strategy(CUDAGPU), -1))
end
Expand Down
31 changes: 5 additions & 26 deletions ext/devices/oneapi/impl.jl
Original file line number Diff line number Diff line change
@@ -1,24 +1,7 @@
"""
oneAPIGPU <: AbstractGPU

Representation of a specific Intel GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
"""
mutable struct oneAPIGPU <: ComputableDAGs.AbstractGPU
device::Any
cacheStrategy::CacheStrategy
FLOPS::Float64
end

push!(ComputableDAGs.DEVICE_TYPES, oneAPIGPU)

ComputableDAGs.CACHE_STRATEGIES[oneAPIGPU] = [LocalVariables()]

ComputableDAGs.default_strategy(::Type{oneAPIGPU}) = LocalVariables()

function ComputableDAGs.measure_device!(device::oneAPIGPU; verbose::Bool)
if verbose
println("Measuring oneAPI GPU $(device.device)")
end
verbose && @info "Measuring oneAPI GPU $(device.device)"

# TODO implement
return nothing
Expand All @@ -29,20 +12,16 @@ end

Return a Vector of [`oneAPIGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(::Type{oneAPIGPU}; verbose::Bool=false)
devices = Vector{AbstractDevice}()
function ComputableDAGs.get_devices(::Type{oneAPIGPU}; verbose::Bool=false)
devices = Vector{ComputableDAGs.AbstractDevice}()

if !oneAPI.functional()
if verbose
println("oneAPI is non-functional")
end
@warn "the oneAPI extension is loaded but oneAPI.jl is non-functional"
return devices
end

oneAPIDevices = oneAPI.devices()
if verbose
println("Found $(length(oneAPIDevices)) oneAPI devices")
end
verbose && @info "Found $(length(oneAPIDevices)) oneAPI devices"
for device in oneAPIDevices
push!(devices, oneAPIGPU(device, default_strategy(oneAPIGPU), -1))
end
Expand Down
31 changes: 31 additions & 0 deletions ext/devices/rocm/function.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
function ComputableDAGs.kernel(::Type{ROCmGPU}, graph::DAG, instance)
machine = cpu_st()
tape = ComputableDAGs.gen_tape(graph, instance, machine, context_module)

init_caches = Expr(:block, tape.initCachesCode...)
assign_inputs = Expr(:block, ComputableDAGs.expr_from_fc.(tape.inputAssignCode)...)
code = Expr(:block, ComputableDAGs.expr_from_fc.(tape.computeCode)...)

function_id = ComputableDAGs.to_var_name(UUIDs.uuid1(ComputableDAGs.rng[1]))
res_sym = eval(
ComputableDAGs.gen_access_expr(
ComputableDAGs.entry_device(tape.machine), tape.outputSymbol
),
)
expr = Meta.parse(
"function compute_$(function_id)(input_vector, output_vector, n::Int64)
id = (workgroupIdx().x - 1) * workgroupDim().x + workgroupIdx().x
if (id > n)
return
end
@inline data_input = input_vector[id]
$(init_caches)
$(assign_inputs)
$code
@inline output_vector[id] = $res_sym
return nothing
end"
)

return expr
end
33 changes: 5 additions & 28 deletions ext/devices/rocm/impl.jl
Original file line number Diff line number Diff line change
@@ -1,26 +1,7 @@
using AMDGPU

"""
ROCmGPU <: AbstractGPU

Representation of a specific AMD GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.
"""
mutable struct ROCmGPU <: ComputableDAGs.AbstractGPU
device::Any
cacheStrategy::CacheStrategy
FLOPS::Float64
end

push!(ComputableDAGs.DEVICE_TYPES, ROCmGPU)

ComputableDAGs.CACHE_STRATEGIES[ROCmGPU] = [LocalVariables()]

ComputableDAGs.default_strategy(::Type{ROCmGPU}) = LocalVariables()

function ComputableDAGs.measure_device!(device::ROCmGPU; verbose::Bool)
if verbose
println("Measuring ROCm GPU $(device.device)")
end
verbose && @info "Measuring ROCm GPU $(device.device)"

# TODO implement
return nothing
Expand All @@ -31,20 +12,16 @@ end

Return a Vector of [`ROCmGPU`](@ref)s available on the current machine. If `verbose` is true, print some additional information.
"""
function get_devices(::Type{ROCmGPU}; verbose::Bool=false)
devices = Vector{AbstractDevice}()
function ComputableDAGs.get_devices(::Type{ROCmGPU}; verbose::Bool=false)
devices = Vector{ComputableDAGs.AbstractDevice}()

if !AMDGPU.functional()
if verbose
println("AMDGPU is non-functional")
end
@warn "The AMDGPU extension is loaded but AMDGPU.jl is non-functional"
return devices
end

AMDDevices = AMDGPU.devices()
if verbose
println("Found $(length(AMDDevices)) AMD devices")
end
verbose && @info "Found $(length(AMDDevices)) AMD devices"
for device in AMDDevices
push!(devices, ROCmGPU(device, default_strategy(ROCmGPU), -1))
end
Expand Down
13 changes: 12 additions & 1 deletion ext/oneAPIExt.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
module oneAPIExt

using ComputableDAGs, oneAPI
using ComputableDAGs
using UUIDs
using oneAPI

function __init__()
@debug "Loading oneAPIExt"

push!(ComputableDAGs.DEVICE_TYPES, oneAPIGPU)
ComputableDAGs.CACHE_STRATEGIES[oneAPIGPU] = [LocalVariables()]

return nothing
end

# include specialized oneAPI functions here
include("devices/oneapi/impl.jl")
Expand Down
5 changes: 3 additions & 2 deletions src/ComputableDAGs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ export get_machine_info, cpu_st
export CacheStrategy, default_strategy
export LocalVariables, Dictionary

# CUDAExt
export cuda_kernel
# GPU Extensions
export kernel, CUDAGPU, ROCmGPU, oneAPIGPU

include("devices/interface.jl")
include("task/type.jl")
Expand Down Expand Up @@ -124,6 +124,7 @@ include("devices/detect.jl")
include("devices/impl.jl")

include("devices/numa/impl.jl")
include("devices/ext.jl")

include("scheduler/interface.jl")
include("scheduler/greedy.jl")
Expand Down
44 changes: 44 additions & 0 deletions src/devices/ext.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# file for struct definitions used by the extensions
# since extensions can't export names themselves

"""
CUDAGPU <: AbstractGPU

Representation of a specific CUDA GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.

!!! note
This requires CUDA to be loaded to be useful.
"""
mutable struct CUDAGPU <: AbstractGPU
device::Any # CuDevice
cacheStrategy::CacheStrategy
FLOPS::Float64
end

"""
oneAPIGPU <: AbstractGPU

Representation of a specific Intel GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.

!!! note
This requires oneAPI to be loaded to be useful.
"""
mutable struct oneAPIGPU <: AbstractGPU
device::Any # oneAPI.oneL0.ZeDevice
cacheStrategy::CacheStrategy
FLOPS::Float64
end

"""
ROCmGPU <: AbstractGPU

Representation of a specific AMD GPU that code can run on. Implements the [`AbstractDevice`](@ref) interface.

!!! note
This requires AMDGPU to be loaded to be useful.
"""
mutable struct ROCmGPU <: AbstractGPU
device::Any # HIPDevice
cacheStrategy::CacheStrategy
FLOPS::Float64
end
11 changes: 1 addition & 10 deletions src/devices/impl.jl
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,6 @@ It is the simplest machine definition possible and produces a simple function wh
"""
function cpu_st()
return Machine(
[
ComputableDAGs.NumaNode(
0,
1,
ComputableDAGs.default_strategy(ComputableDAGs.NumaNode),
-1.0,
UUIDs.uuid1(),
),
],
[-1.0;;],
[NumaNode(0, 1, default_strategy(NumaNode), -1.0, UUIDs.uuid1())], [-1.0;;]
)
end
Loading
Loading