Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ steps:

println("--- :julia: Instantiating project")
Pkg.develop([PackageSpec(path=pwd())])
Pkg.add(url="https://github.com/JuliaGPU/AcceleratedKernels.jl", rev="main")

println("+++ :julia: Benchmarking")
include("perf/runbenchmarks.jl")'
Expand Down
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ uuid = "dde4c033-4e86-420c-a63e-0dd931031962"
version = "1.6.2"

[deps]
AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
CEnum = "fa961155-64e5-5f13-b03f-caf6b980ea82"
Expand Down Expand Up @@ -32,6 +33,7 @@ SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
SpecialFunctionsExt = "SpecialFunctions"

[compat]
AcceleratedKernels = "0.4"
Adapt = "4"
BFloat16s = "0.5"
CEnum = "0.4, 0.5"
Expand Down
38 changes: 31 additions & 7 deletions perf/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ for (S, smname) in [(Metal.PrivateStorage,"private"), (Metal.SharedStorage,"shar
gpu_vec = reshape(gpu_mat, length(gpu_mat))
gpu_arr_3d = reshape(gpu_mat, (m, 40, 25))
gpu_arr_4d = reshape(gpu_mat, (m, 10, 10, 10))
gpu_mat_ints = MtlMatrix{Int,S}(rand(rng, Int, m, n))
gpu_mat_ints = MtlMatrix{Int,S}(rand(rng, -10:10, m, n))
gpu_vec_ints = reshape(gpu_mat_ints, length(gpu_mat_ints))
gpu_mat_bools = MtlMatrix{Bool,S}(rand(rng, Bool, m, n))
gpu_vec_bools = reshape(gpu_mat_bools, length(gpu_mat_bools))
Expand Down Expand Up @@ -58,19 +58,43 @@ for (S, smname) in [(Metal.PrivateStorage,"private"), (Metal.SharedStorage,"shar

# no need to test inplace version, which performs the same operation (but with an alloc)
let group = addgroup!(group, "accumulate")
group["1d"] = @benchmarkable Metal.@sync accumulate(+, $gpu_vec)
group["2d"] = @benchmarkable Metal.@sync accumulate(+, $gpu_mat; dims=1)
let group = addgroup!(group, "Float32")
group["1d"] = @benchmarkable Metal.@sync accumulate(+, $gpu_vec)
group["dims=1"] = @benchmarkable Metal.@sync accumulate(+, $gpu_mat; dims=1)
group["dims=2"] = @benchmarkable Metal.@sync accumulate(+, $gpu_mat; dims=2)
end
let group = addgroup!(group, "Int64")
group["1d"] = @benchmarkable Metal.@sync accumulate(+, $gpu_vec_ints)
group["dims=1"] = @benchmarkable Metal.@sync accumulate(+, $gpu_mat_ints; dims=1)
group["dims=2"] = @benchmarkable Metal.@sync accumulate(+, $gpu_mat_ints; dims=2)
end
end

let group = addgroup!(group, "reductions")
let group = addgroup!(group, "reduce")
group["1d"] = @benchmarkable Metal.@sync reduce(+, $gpu_vec)
group["2d"] = @benchmarkable Metal.@sync reduce(+, $gpu_mat; dims=1)
let group = addgroup!(group, "Float32")
group["1d"] = @benchmarkable Metal.@sync reduce(+, $gpu_vec)
group["dims=1"] = @benchmarkable Metal.@sync reduce(+, $gpu_mat; dims=1)
group["dims=2"] = @benchmarkable Metal.@sync reduce(+, $gpu_mat; dims=2)
end
let group = addgroup!(group, "Int64")
group["1d"] = @benchmarkable Metal.@sync reduce(+, $gpu_vec_ints)
group["dims=1"] = @benchmarkable Metal.@sync reduce(+, $gpu_mat_ints; dims=1)
group["dims=2"] = @benchmarkable Metal.@sync reduce(+, $gpu_mat_ints; dims=2)
end
end

let group = addgroup!(group, "mapreduce")
group["1d"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_vec)
group["2d"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_mat; dims=1)
let group = addgroup!(group, "Float32")
group["1d"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_vec)
group["dims=1"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_mat; dims=1)
group["dims=2"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_mat; dims=2)
end
let group = addgroup!(group, "Int64")
group["1d"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_vec_ints)
group["dims=1"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_mat_ints; dims=1)
group["dims=2"] = @benchmarkable Metal.@sync mapreduce(x->x+1, +, $gpu_mat_ints; dims=2)
end
end

# used by sum, prod, minimum, maximum, all, any, count
Expand Down
1 change: 1 addition & 0 deletions src/Metal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ using ExprTools: splitdef, combinedef
using ObjectiveC, .CoreFoundation, .Foundation, .Dispatch, .OS
import ObjectiveC: is_macos, darwin_version, macos_version
import KernelAbstractions
import AcceleratedKernels as AK
using ScopedValues

include("version.jl")
Expand Down
19 changes: 10 additions & 9 deletions src/accumulate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -170,33 +170,34 @@ end
## Base interface

Base._accumulate!(op, output::WrappedMtlArray, input::WrappedMtlVector, dims::Nothing, init::Nothing) =
scan!(op, output, input; dims=1)
@inline AK.accumulate!(op, output, input, MetalBackend(); dims, init=AK.neutral_element(op, eltype(output)))

Base._accumulate!(op, output::WrappedMtlArray, input::WrappedMtlArray, dims::Integer, init::Nothing) =
scan!(op, output, input; dims=dims)

@inline AK.accumulate!(op, output, input, MetalBackend(); dims, init=AK.neutral_element(op, eltype(output)))
Base._accumulate!(op, output::WrappedMtlArray, input::MtlVector, dims::Nothing, init::Some) =
scan!(op, output, input; dims=1, init=init)
@inline AK.accumulate!(op, output, input, MetalBackend(); dims, init=something(init))

Base._accumulate!(op, output::WrappedMtlArray, input::WrappedMtlArray, dims::Integer, init::Some) =
scan!(op, output, input; dims=dims, init=init)
@inline AK.accumulate!(op, output, input, MetalBackend(); dims, init=something(init))

Base.accumulate_pairwise!(op, result::WrappedMtlVector, v::WrappedMtlVector) = accumulate!(op, result, v)
Base.accumulate_pairwise!(op, result::WrappedMtlVector, v::WrappedMtlVector) = @inline AK.accumulate!(op, result, v, MetalBackend(); init=AK.neutral_element(op, eltype(result)))

# default behavior unless dims are specified by the user
function Base.accumulate(op, A::WrappedMtlArray;
dims::Union{Nothing,Integer}=nothing, kw...)
nt = values(kw)
if dims === nothing && !(A isa AbstractVector)
# This branch takes care of the cases not handled by `_accumulate!`.
return reshape(accumulate(op, A[:]; kw...), size(A))
return reshape(AK.accumulate(op, A[:], MetalBackend(); init = (:init in keys(kw) ? nt.init : AK.neutral_element(op, eltype(A)))), size(A))
end
nt = values(kw)
if isempty(kw)
out = similar(A, Base.promote_op(op, eltype(A), eltype(A)))
init = AK.neutral_element(op, eltype(out))
elseif keys(nt) === (:init,)
out = similar(A, Base.promote_op(op, typeof(nt.init), eltype(A)))
init = nt.init
else
throw(ArgumentError("accumulate does not support the keyword arguments $(setdiff(keys(nt), (:init,)))"))
end
accumulate!(op, out, A; dims=dims, kw...)
AK.accumulate!(op, out, A, MetalBackend(); dims, init)
end
27 changes: 27 additions & 0 deletions src/mapreduce.jl
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,33 @@ end

## COV_EXCL_STOP

Base.mapreduce(f, op, A::WrappedMtlArray;
dims=:, init=nothing) = _mapreduce(f, op, A, init, dims)
# dims=:, init=nothing) = AK.mapreduce(f, op, A, init, dims=dims isa Colon ? nothing : dims)
Base.mapreduce(f, op, A::Broadcast.Broadcasted{<:MtlArrayStyle};
dims=:, init=nothing) = _mapreduce(f, op, A, init, dims)
# dims=:, init=nothing) = AK.mapreduce(f, op, A, init, dims=dims isa Colon ? nothing : dims)

# "Borrowed" from GPUArrays
@inline function _init_value(f, op, init, As...)
if init === nothing
ET = Broadcast.combine_eltypes(f, As)
ET = Base.promote_op(op, ET, ET)
(ET === Union{} || ET === Any) &&
error("mapreduce cannot figure the output element type, please pass an explicit init value")

init = AK.neutral_element(op, ET)
end
return init
end

function _mapreduce(f, op, A, init, dims::Union{Nothing, Integer})
init_val = _init_value(f, op, init, A)
AK.mapreduce(f, op, A; init=init_val, neutral=init_val, dims)
end
_mapreduce(f, op, A, init, ::Colon) = _mapreduce(f, op, A, init, nothing)
_mapreduce(f, op, A, init, dims) = GPUArrays._mapreduce(f, op, A; dims, init)

function GPUArrays.mapreducedim!(f::F, op::OP, R::WrappedMtlArray{T},
A::Union{AbstractArray,Broadcast.Broadcasted};
init=nothing) where {F, OP, T}
Expand Down
2 changes: 2 additions & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
[deps]
AcceleratedKernels = "6a4ca0a5-0e36-4168-a932-d9be78d558f1"
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
Expand All @@ -11,6 +12,7 @@ KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
ObjectiveC = "e86c9b32-1129-44ac-8ea0-90d5bb39ded9"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Expand Down
2 changes: 2 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using Pkg
Pkg.develop("AcceleratedKernels")
using Distributed
using Dates
using Metal
Expand Down