From d980091ebc766685b80bb5133401c70ffb2d9b24 Mon Sep 17 00:00:00 2001 From: AntonReinhard Date: Fri, 1 Nov 2024 13:34:03 +0100 Subject: [PATCH] Remove occurrences of CacheStrategy everywhere --- ext/AMDGPUExt.jl | 1 - ext/CUDAExt.jl | 1 - ext/devices/cuda/function.jl | 6 +--- ext/devices/cuda/impl.jl | 4 +-- ext/devices/oneapi/impl.jl | 4 +-- ext/devices/rocm/function.jl | 6 +--- ext/devices/rocm/impl.jl | 4 +-- ext/oneAPIExt.jl | 1 - src/ComputableDAGs.jl | 2 -- src/code_gen/function.jl | 11 ++----- src/code_gen/tape_machine.jl | 42 +++--------------------- src/code_gen/type.jl | 2 -- src/devices/ext.jl | 3 -- src/devices/impl.jl | 41 ++--------------------- src/devices/interface.jl | 63 +++--------------------------------- src/devices/numa/impl.jl | 55 +++---------------------------- 16 files changed, 24 insertions(+), 222 deletions(-) diff --git a/ext/AMDGPUExt.jl b/ext/AMDGPUExt.jl index 6572658..c4bf53e 100644 --- a/ext/AMDGPUExt.jl +++ b/ext/AMDGPUExt.jl @@ -8,7 +8,6 @@ function __init__() @debug "Loading AMDGPUExt" push!(ComputableDAGs.DEVICE_TYPES, ROCmGPU) - ComputableDAGs.CACHE_STRATEGIES[ROCmGPU] = [LocalVariables()] return nothing end diff --git a/ext/CUDAExt.jl b/ext/CUDAExt.jl index 0797b10..0b28203 100644 --- a/ext/CUDAExt.jl +++ b/ext/CUDAExt.jl @@ -8,7 +8,6 @@ function __init__() @debug "Loading CUDAExt" push!(ComputableDAGs.DEVICE_TYPES, CUDAGPU) - ComputableDAGs.CACHE_STRATEGIES[CUDAGPU] = [LocalVariables()] return nothing end diff --git a/ext/devices/cuda/function.jl b/ext/devices/cuda/function.jl index 7f0f889..7ccdf94 100644 --- a/ext/devices/cuda/function.jl +++ b/ext/devices/cuda/function.jl @@ -4,7 +4,6 @@ function ComputableDAGs.kernel( machine = cpu_st() tape = ComputableDAGs.gen_tape(graph, instance, machine, context_module) - init_caches = Expr(:block, tape.initCachesCode...) assign_inputs = Expr(:block, ComputableDAGs.expr_from_fc.(tape.inputAssignCode)...) # TODO: use gen_function_body here code = Expr(:block, ComputableDAGs.expr_from_fc.(tape.schedule)...) @@ -12,9 +11,7 @@ function ComputableDAGs.kernel( function_id = ComputableDAGs.to_var_name(UUIDs.uuid1(ComputableDAGs.rng[1])) res_sym = eval( ComputableDAGs._gen_access_expr( - ComputableDAGs.entry_device(tape.machine), - ComputableDAGs.entry_device(tape.machine).cacheStrategy, - tape.outputSymbol, + ComputableDAGs.entry_device(tape.machine), tape.outputSymbol ), ) expr = Meta.parse( @@ -24,7 +21,6 @@ function ComputableDAGs.kernel( return end @inline data_input = input_vector[id] - $(init_caches) $(assign_inputs) $code @inline output_vector[id] = $res_sym diff --git a/ext/devices/cuda/impl.jl b/ext/devices/cuda/impl.jl index 77b1e3f..c05d126 100644 --- a/ext/devices/cuda/impl.jl +++ b/ext/devices/cuda/impl.jl @@ -1,5 +1,3 @@ -ComputableDAGs.default_strategy(::Type{CUDAGPU}) = LocalVariables() - function ComputableDAGs.measure_device!(device::CUDAGPU; verbose::Bool) verbose && @info "Measuring CUDA GPU $(device.device)" @@ -23,7 +21,7 @@ function ComputableDAGs.get_devices(::Type{CUDAGPU}; verbose::Bool=false) CUDADevices = CUDA.devices() verbose && @info "Found $(length(CUDADevices)) CUDA devices" for device in CUDADevices - push!(devices, CUDAGPU(device, default_strategy(CUDAGPU), -1)) + push!(devices, CUDAGPU(device, -1)) end return devices diff --git a/ext/devices/oneapi/impl.jl b/ext/devices/oneapi/impl.jl index 61c44dd..e5bce0b 100644 --- a/ext/devices/oneapi/impl.jl +++ b/ext/devices/oneapi/impl.jl @@ -1,5 +1,3 @@ -ComputableDAGs.default_strategy(::Type{oneAPIGPU}) = LocalVariables() - function ComputableDAGs.measure_device!(device::oneAPIGPU; verbose::Bool) verbose && @info "Measuring oneAPI GPU $(device.device)" @@ -23,7 +21,7 @@ function ComputableDAGs.get_devices(::Type{oneAPIGPU}; verbose::Bool=false) oneAPIDevices = oneAPI.devices() verbose && @info "Found $(length(oneAPIDevices)) oneAPI devices" for device in oneAPIDevices - push!(devices, oneAPIGPU(device, default_strategy(oneAPIGPU), -1)) + push!(devices, oneAPIGPU(device, -1)) end return devices diff --git a/ext/devices/rocm/function.jl b/ext/devices/rocm/function.jl index 64cfcfe..dc617e2 100644 --- a/ext/devices/rocm/function.jl +++ b/ext/devices/rocm/function.jl @@ -4,7 +4,6 @@ function ComputableDAGs.kernel( machine = cpu_st() tape = ComputableDAGs.gen_tape(graph, instance, machine, context_module) - init_caches = Expr(:block, tape.initCachesCode...) assign_inputs = Expr(:block, ComputableDAGs.expr_from_fc.(tape.inputAssignCode)...) # TODO use gen_function_body here @@ -13,9 +12,7 @@ function ComputableDAGs.kernel( function_id = ComputableDAGs.to_var_name(UUIDs.uuid1(ComputableDAGs.rng[1])) res_sym = eval( ComputableDAGs._gen_access_expr( - ComputableDAGs.entry_device(tape.machine), - ComputableDAGs.entry_device(tape.machine).cacheStrategy, - tape.outputSymbol, + ComputableDAGs.entry_device(tape.machine), tape.outputSymbol ), ) expr = Meta.parse( @@ -25,7 +22,6 @@ function ComputableDAGs.kernel( return end @inline data_input = input_vector[id] - $(init_caches) $(assign_inputs) $code @inline output_vector[id] = $res_sym diff --git a/ext/devices/rocm/impl.jl b/ext/devices/rocm/impl.jl index 18ea34f..0352540 100644 --- a/ext/devices/rocm/impl.jl +++ b/ext/devices/rocm/impl.jl @@ -1,5 +1,3 @@ -ComputableDAGs.default_strategy(::Type{ROCmGPU}) = LocalVariables() - function ComputableDAGs.measure_device!(device::ROCmGPU; verbose::Bool) verbose && @info "Measuring ROCm GPU $(device.device)" @@ -23,7 +21,7 @@ function ComputableDAGs.get_devices(::Type{ROCmGPU}; verbose::Bool=false) AMDDevices = AMDGPU.devices() verbose && @info "Found $(length(AMDDevices)) AMD devices" for device in AMDDevices - push!(devices, ROCmGPU(device, default_strategy(ROCmGPU), -1)) + push!(devices, ROCmGPU(device, -1)) end return devices diff --git a/ext/oneAPIExt.jl b/ext/oneAPIExt.jl index c638942..240eb53 100644 --- a/ext/oneAPIExt.jl +++ b/ext/oneAPIExt.jl @@ -8,7 +8,6 @@ function __init__() @debug "Loading oneAPIExt" push!(ComputableDAGs.DEVICE_TYPES, oneAPIGPU) - ComputableDAGs.CACHE_STRATEGIES[oneAPIGPU] = [LocalVariables()] return nothing end diff --git a/src/ComputableDAGs.jl b/src/ComputableDAGs.jl index 291fc4f..a852e15 100644 --- a/src/ComputableDAGs.jl +++ b/src/ComputableDAGs.jl @@ -57,8 +57,6 @@ export problem_instance, input_type, graph, input_expr export Machine export NumaNode export get_machine_info, cpu_st -export CacheStrategy, default_strategy -export LocalVariables, Dictionary # GPU Extensions export kernel, CUDAGPU, ROCmGPU, oneAPIGPU diff --git a/src/code_gen/function.jl b/src/code_gen/function.jl index f7afbf7..f8dbce0 100644 --- a/src/code_gen/function.jl +++ b/src/code_gen/function.jl @@ -24,18 +24,11 @@ function get_compute_function( ) tape = gen_tape(graph, instance, machine, context_module) - initCaches = Expr(:block, tape.initCachesCode...) assignInputs = Expr(:block, expr_from_fc.(tape.inputAssignCode)...) code = gen_function_body(tape; closures_size=closures_size) functionId = to_var_name(UUIDs.uuid1(rng[1])) - resSym = eval( - _gen_access_expr( - entry_device(tape.machine), - entry_device(tape.machine).cacheStrategy, - tape.outputSymbol, - ), - ) + resSym = eval(_gen_access_expr(entry_device(tape.machine), tape.outputSymbol)) expr = # Expr( :function, # function definition @@ -44,7 +37,7 @@ function get_compute_function( Symbol("compute_$functionId"), Expr(:(::), :data_input, input_type(instance)), ), # function name and parameters - Expr(:block, initCaches, assignInputs, code, Expr(:return, resSym)), # function body + Expr(:block, assignInputs, code, Expr(:return, resSym)), # function body ) return RuntimeGeneratedFunction(@__MODULE__, context_module, expr) diff --git a/src/code_gen/tape_machine.jl b/src/code_gen/tape_machine.jl index 78eabbe..7bdc584 100644 --- a/src/code_gen/tape_machine.jl +++ b/src/code_gen/tape_machine.jl @@ -52,11 +52,7 @@ end function expr_from_fc(fc::FunctionCall{VectorT,0}) where {VectorT} func_call = Expr( - :call, - fc.func, - eval.( - _gen_access_expr.(Ref(fc.device), Ref(fc.device.cacheStrategy), fc.arguments) - )..., + :call, fc.func, eval.(_gen_access_expr.(Ref(fc.device), fc.arguments))... ) access_expr = eval(gen_access_expr(fc)) @@ -73,30 +69,13 @@ function expr_from_fc(fc::FunctionCall{VectorT,M}) where {VectorT,M} :call, fc.func, fc.value_arguments..., - eval.( - _gen_access_expr.(Ref(fc.device), Ref(fc.device.cacheStrategy), fc.arguments) - )..., + eval.(_gen_access_expr.(Ref(fc.device), fc.arguments))..., ) access_expr = eval(gen_access_expr(fc)) return Expr(:(=), access_expr, func_call) end -""" - gen_cache_init_code(machine::Machine) - -For each [`AbstractDevice`](@ref) in the given [`Machine`](@ref), returning a `Vector{Expr}` doing the initialization. -""" -function gen_cache_init_code(machine::Machine) - initialize_caches = Vector{Expr}() - - for device in machine.devices - push!(initialize_caches, gen_cache_init_code(device)) - end - - return initialize_caches -end - """ gen_input_assignment_code( input_symbols::Dict{String, Vector{Symbol}}, @@ -176,7 +155,7 @@ function gen_function_body(tape::Tape; closures_size::Int) ret_symbols_set = Set(return_symbols) for fc in code_block for arg in fc.arguments - symbol = eval(_gen_access_expr(fc.device, fc.device.cacheStrategy, arg)) + symbol = eval(_gen_access_expr(fc.device, arg)) # symbol won't be defined if it is first calculated in the closure # so don't add it to the arguments in this case @@ -255,18 +234,10 @@ function gen_tape( # get outSymbol outSym = Symbol(to_var_name(get_exit_node(graph).id)) - init_caches = gen_cache_init_code(machine) assign_inputs = gen_input_assignment_code(input_syms, instance, machine, context_module) return Tape{input_type(instance)}( - init_caches, - assign_inputs, - function_body, - input_syms, - outSym, - Dict(), - instance, - machine, + assign_inputs, function_body, input_syms, outSym, instance, machine ) end @@ -275,8 +246,6 @@ end Execute the given tape with the given input. -For implementation reasons, this disregards the set [`CacheStrategy`](@ref) of the devices and always uses a dictionary. - !!! warning This is very slow and might not work. This is to be majorly revamped. """ @@ -285,9 +254,6 @@ function execute_tape(tape::Tape, input) cache[:input] = input # simply execute all the code snippets here @assert typeof(input) <: input_type(tape.instance) "expected tape input type to fit $(input_type(tape.instance)) but got $(typeof(input))" - for expr in tape.initCachesCode - @eval $expr - end compute_code = tape.schedule diff --git a/src/code_gen/type.jl b/src/code_gen/type.jl index 06405b9..d08bf46 100644 --- a/src/code_gen/type.jl +++ b/src/code_gen/type.jl @@ -10,12 +10,10 @@ TODO: update docs - `outputSymbol::Symbol`: The symbol of the final calculated value """ struct Tape{INPUT} - initCachesCode::Vector{Expr} inputAssignCode::Vector{FunctionCall} schedule::Vector{FunctionCall} inputSymbols::Dict{String,Vector{Symbol}} outputSymbol::Symbol - cache::Dict{Symbol,Any} instance::Any machine::Machine end diff --git a/src/devices/ext.jl b/src/devices/ext.jl index b346465..4a60e4d 100644 --- a/src/devices/ext.jl +++ b/src/devices/ext.jl @@ -11,7 +11,6 @@ Representation of a specific CUDA GPU that code can run on. Implements the [`Abs """ mutable struct CUDAGPU <: AbstractGPU device::Any # CuDevice - cacheStrategy::CacheStrategy FLOPS::Float64 end @@ -25,7 +24,6 @@ Representation of a specific Intel GPU that code can run on. Implements the [`Ab """ mutable struct oneAPIGPU <: AbstractGPU device::Any # oneAPI.oneL0.ZeDevice - cacheStrategy::CacheStrategy FLOPS::Float64 end @@ -39,6 +37,5 @@ Representation of a specific AMD GPU that code can run on. Implements the [`Abst """ mutable struct ROCmGPU <: AbstractGPU device::Any # HIPDevice - cacheStrategy::CacheStrategy FLOPS::Float64 end diff --git a/src/devices/impl.jl b/src/devices/impl.jl index 2f84121..05c0049 100644 --- a/src/devices/impl.jl +++ b/src/devices/impl.jl @@ -18,39 +18,6 @@ function entry_device(machine::Machine) return machine.devices[1] end -""" - strategies(t::Type{T}) where {T <: AbstractDevice} - -Return a vector of available [`CacheStrategy`](@ref)s for the given [`AbstractDevice`](@ref). -The caching strategies are used in code generation. -""" -function strategies(t::Type{T}) where {T<:AbstractDevice} - if !haskey(CACHE_STRATEGIES, t) - error("Trying to get strategies for $T, but it has no strategies defined!") - end - - return CACHE_STRATEGIES[t] -end - -""" - cache_strategy(device::AbstractDevice) - -Returns the cache strategy set for this device. -""" -function cache_strategy(device::AbstractDevice) - return device.cacheStrategy -end - -""" - set_cache_strategy(device::AbstractDevice, cacheStrategy::CacheStrategy) - -Sets the device's cache strategy. After this call, [`cache_strategy`](@ref) should return `cacheStrategy` on the given device. -""" -function set_cache_strategy(device::AbstractDevice, cacheStrategy::CacheStrategy) - device.cacheStrategy = cacheStrategy - return nothing -end - """ cpu_st() @@ -58,9 +25,7 @@ A function returning a [`Machine`](@ref) that only has a single thread of one CP It is the simplest machine definition possible and produces a simple function when used with [`get_compute_function`](@ref). """ function cpu_st() - return Machine( - [NumaNode(0, 1, default_strategy(NumaNode), -1.0, UUIDs.uuid1())], [-1.0;;] - ) + return Machine([NumaNode(0, 1, -1.0, UUIDs.uuid1())], [-1.0;;]) end """ @@ -69,7 +34,7 @@ end Dispatch from the given [`FunctionCall`](@ref) to the interface function `_gen_access_expr`(@ref). """ function gen_access_expr(fc::FunctionCall) - return _gen_access_expr(fc.device, fc.device.cacheStrategy, fc.return_symbol) + return _gen_access_expr(fc.device, fc.return_symbol) end """ @@ -78,5 +43,5 @@ end Dispatch from the given [`FunctionCall`](@ref) to the interface function `_gen_local_init`(@ref). """ function gen_local_init(fc::FunctionCall) - return _gen_local_init(fc, fc.device, fc.device.cacheStrategy) + return _gen_local_init(fc, fc.device) end diff --git a/src/devices/interface.jl b/src/devices/interface.jl index a4377e0..a4a08be 100644 --- a/src/devices/interface.jl +++ b/src/devices/interface.jl @@ -2,7 +2,7 @@ AbstractDevice Abstract base type for every device, like GPUs, CPUs or any other compute devices. -Every implementation needs to implement various functions and needs a member `cacheStrategy`. +Every implementation needs to implement various functions. """ abstract type AbstractDevice end @@ -23,33 +23,6 @@ struct Machine transferRates::Matrix{Float64} end -""" - CacheStrategy - -Abstract base type for caching strategies. - -See also: [`strategies`](@ref) -""" -abstract type CacheStrategy end - -""" - LocalVariables <: CacheStrategy - -A caching strategy relying solely on local variables for every input and output. - -Implements the [`CacheStrategy`](@ref) interface. -""" -struct LocalVariables <: CacheStrategy end - -""" - Dictionary <: CacheStrategy - -A caching strategy relying on a dictionary of Symbols to store every input and output. - -Implements the [`CacheStrategy`](@ref) interface. -""" -struct Dictionary <: CacheStrategy end - """ DEVICE_TYPES::Vector{Type} @@ -59,23 +32,6 @@ See also: [`device_types`](@ref), [`get_devices`](@ref) """ DEVICE_TYPES = Vector{Type}() -""" - CACHE_STRATEGIES::Dict{Type{AbstractDevice}, Symbol} - -Global dictionary of available caching strategies per device. Each implementation of [`AbstractDevice`](@ref) should add its available strategies to the dictionary. - -See also: [`strategies`](@ref) -""" -CACHE_STRATEGIES = Dict{Type,Vector{CacheStrategy}}() - -""" - default_strategy(deviceType::Type{T}) where {T <: AbstractDevice} - -Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Returns the default [`CacheStrategy`](@ref) to use on the given device type. -See also: [`cache_strategy`](@ref), [`set_cache_strategy`](@ref) -""" -function default_strategy end - """ get_devices(t::Type{T}; verbose::Bool) where {T <: AbstractDevice} @@ -91,26 +47,17 @@ Interface function that must be implemented for every subtype of [`AbstractDevic function measure_device! end """ - gen_cache_init_code(device::AbstractDevice) - -Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref). Returns an `Expr` initializing this device's variable cache. - -The strategy is a symbol -""" -function gen_cache_init_code end - -""" - _gen_access_expr(device::AbstractDevice, cache_strategy::CacheStrategy, symbol::Symbol) + _gen_access_expr(device::AbstractDevice, symbol::Symbol) -Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref). +Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Return an `Expr` or `QuoteNode` accessing the variable identified by [`symbol`]. """ function _gen_access_expr end """ - _gen_local_init(fc::FunctionCall, device::AbstractDevice, cache_strategy::CacheStrategy) + _gen_local_init(fc::FunctionCall, device::AbstractDevice) -Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref) and at least one [`CacheStrategy`](@ref). +Interface function that must be implemented for every subtype of [`AbstractDevice`](@ref). Return an `Expr` or `QuoteNode` that initializes the access expression returned by [`_gen_access_expr`](@ref) in the local scope. This expression may be empty. For local variables it should be `local ::`. """ diff --git a/src/devices/numa/impl.jl b/src/devices/numa/impl.jl index 0c61b75..e3a2ec8 100644 --- a/src/devices/numa/impl.jl +++ b/src/devices/numa/impl.jl @@ -8,17 +8,12 @@ Representation of a specific CPU that code can run on. Implements the [`Abstract mutable struct NumaNode <: AbstractCPU numaId::UInt16 threads::UInt16 - cacheStrategy::CacheStrategy FLOPS::Float64 id::UUID end push!(DEVICE_TYPES, NumaNode) -CACHE_STRATEGIES[NumaNode] = [LocalVariables()] - -default_strategy(::Type{T}) where {T<:NumaNode} = LocalVariables() - function measure_device!(device::NumaNode; verbose::Bool) verbose && @info "Measuring Numa Node $(device.numaId)" @@ -38,37 +33,18 @@ function get_devices(deviceType::Type{T}; verbose::Bool=false) where {T<:NumaNod verbose && @info "Found $(noNumaNodes + 1) NUMA nodes" for i in 0:noNumaNodes - push!(devices, NumaNode(i, 1, default_strategy(NumaNode), -1, UUIDs.uuid1(rng[1]))) + push!(devices, NumaNode(i, 1, -1, UUIDs.uuid1(rng[1]))) end return devices end """ - gen_cache_init_code(device::NumaNode) - -Generate code for initializing the [`LocalVariables`](@ref) strategy on a [`NumaNode`](@ref). -""" -function gen_cache_init_code(device::NumaNode) - if typeof(device.cacheStrategy) <: LocalVariables - # don't need to initialize anything - return Expr(:block) - elseif typeof(device.cacheStrategy) <: Dictionary - return Meta.parse("cache_$(to_var_name(device.id)) = Dict{Symbol, Any}()") - # TODO: sizehint? - end - - return error( - "Unimplemented cache strategy \"$(device.cacheStrategy)\" for device \"$(device)\"" - ) -end - -""" - _gen_access_expr(device::NumaNode, ::LocalVariables, symbol::Symbol) + _gen_access_expr(device::NumaNode, symbol::Symbol) Interface implementation, dispatched to from [`gen_access_expr`](@ref). """ -function _gen_access_expr(::NumaNode, ::LocalVariables, symbol::Symbol) +function _gen_access_expr(::NumaNode, symbol::Symbol) # TODO rewrite these with Expr instead of quote node s = Symbol("data_$symbol") quote_node = Meta.parse(":($s)") @@ -76,33 +52,12 @@ function _gen_access_expr(::NumaNode, ::LocalVariables, symbol::Symbol) end """ - _gen_access_expr(device::NumaNode, ::Dictionary, symbol::Symbol) - -Interface implementation, dispatched to from [`gen_access_expr`](@ref). -""" -function _gen_access_expr(device::NumaNode, ::Dictionary, symbol::Symbol) - # TODO rewrite these with Expr instead of quote node - access_str = ":(cache_$(to_var_name(device.id))[:$symbol])" - quote_node = Meta.parse(access_str) - return quote_node -end - -""" - _gen_local_init(fc::FunctionCall, device::NumaNode, cache_strategy::LocalVariables) + _gen_local_init(fc::FunctionCall, device::NumaNode) Interface implementation, dispatched to from [`gen_local_init`](@ref). """ -function _gen_local_init(fc::FunctionCall, ::NumaNode, ::LocalVariables) +function _gen_local_init(fc::FunctionCall, ::NumaNode) s = Symbol("data_$(fc.return_symbol)") quote_node = Expr(:local, s, :(::), Symbol(fc.return_type)) # TODO: figure out how to get type info for this local variable return quote_node end - -""" - _gen_local_init(fc::FunctionCall, device::NumaNode, cache_strategy::Dictionary) - -Interface implementation, dispatched to from [`gen_local_init`](@ref). -""" -function _gen_local_init(::FunctionCall, ::NumaNode, ::Dictionary) - return Exp() -end