|
| 1 | + |
| 2 | +using Octavian, StaticArrays, LinearAlgebra, BenchmarkTools, ProgressMeter |
| 3 | + |
| 4 | +# BLAS.set_num_threads(1) |
| 5 | + |
| 6 | +# For laptops that thermally throttle, you can set the `JULIA_SLEEP_BENCH` environment variable for #seconds to sleep before each `@belapsed` |
| 7 | +const SLEEPTIME = parse(Float64, get(ENV, "JULIA_SLEEP_BENCH", "0")) |
| 8 | +maybe_sleep() = iszero(SLEEPTIME) || sleep(SLEEPTIME) |
| 9 | +# BenchmarkTools.DEFAULT_PARAMETERS.samples = 1_000_000 |
| 10 | +# BenchmarkTools.DEFAULT_PARAMETERS.seconds = 10 |
| 11 | + |
| 12 | +matrix_sizes(x::Integer) = (x,x,x) |
| 13 | +matrix_sizes(x::NTuple{3}) = x |
| 14 | + |
| 15 | +const matmulmethodnames = [:SMatrix, :MMatrix, :OctavianStatic, :OctavianDynamic]; |
| 16 | +function fill_bench_results!(br, lp, (M,K,N), t, i, j) |
| 17 | + name = matmulmethodnames[j] |
| 18 | + br[i,j,1] = t |
| 19 | + gflops = 2e-9M*K*N / t |
| 20 | + br[i,j,2] = gflops |
| 21 | + lp[j+1] = (name, gflops) |
| 22 | + nothing |
| 23 | +end |
| 24 | + |
| 25 | +function runbenches(sr, ::Type{T} = Float64) where {T} |
| 26 | + bench_results = Array{Float64}(undef, length(sr), 4, 2) |
| 27 | + p = Progress(length(sr)) |
| 28 | + last_perfs = Vector{Tuple{Symbol,Union{Float64,NTuple{3,Int}}}}(undef, size(bench_results,2) + 1) |
| 29 | + for (i,s) ∈ enumerate(sr) |
| 30 | + M, K, N = matrix_sizes(s) |
| 31 | + last_perfs[1] = (:Size, (M,K,N)) |
| 32 | + Astatic = @SMatrix rand(T, M, K); |
| 33 | + Bstatic = @SMatrix rand(T, K, N); |
| 34 | + maybe_sleep() |
| 35 | + t = @belapsed $(Ref(Astatic))[] * $(Ref(Bstatic))[] |
| 36 | + fill_bench_results!(bench_results, last_perfs, (M,K,N), t, i, 1) |
| 37 | + Amutable = MArray(Astatic); |
| 38 | + Bmutable = MArray(Bstatic); |
| 39 | + Cmutable = MMatrix{M,N,T}(undef); |
| 40 | + maybe_sleep() |
| 41 | + t = @belapsed mul!($Cmutable, $Amutable, $Bmutable) |
| 42 | + fill_bench_results!(bench_results, last_perfs, (M,K,N), t, i, 2) |
| 43 | + Cmutable0 = copy(Cmutable); Cmutable .= NaN; |
| 44 | + maybe_sleep() |
| 45 | + t = @belapsed matmul!($Cmutable, $Amutable, $Bmutable) |
| 46 | + fill_bench_results!(bench_results, last_perfs, (M,K,N), t, i, 3) |
| 47 | + A = Array(Amutable); B = Array(Bmutable); C = Array(Cmutable); |
| 48 | + maybe_sleep() |
| 49 | + t = @belapsed matmul!($C, $A, $B) |
| 50 | + fill_bench_results!(bench_results, last_perfs, (M,K,N), t, i, 4) |
| 51 | + @assert Array(Cmutable) ≈ Array(Cmutable0) ≈ C |
| 52 | + ProgressMeter.next!(p; showvalues = last_perfs) |
| 53 | + end |
| 54 | + bench_results |
| 55 | +end |
| 56 | + |
| 57 | +sizerange = 2:48 |
| 58 | +br = runbenches(sizerange); |
| 59 | +using DataFrames, VegaLite |
| 60 | + |
| 61 | +df = DataFrame(@view(br[:,:,2])); |
| 62 | +rename!(df, matmulmethodnames); |
| 63 | +df.Size = sizerange |
| 64 | + |
| 65 | +function pick_suffix(desc = "") |
| 66 | + suffix = if Octavian.VectorizationBase.AVX512F |
| 67 | + "AVX512" |
| 68 | + elseif Octavian.VectorizationBase.AVX2 |
| 69 | + "AVX2" |
| 70 | + elseif Octavian.VectorizationBase.REGISTER_SIZE == 32 |
| 71 | + "AVX" |
| 72 | + else |
| 73 | + "REGSUZE$(Octavian.VectorizationBase.REGISTER_SIZE)" |
| 74 | + end |
| 75 | + if desc != "" |
| 76 | + suffix *= '_' * desc |
| 77 | + end |
| 78 | + "$(Sys.CPU_NAME)_$suffix" |
| 79 | +end |
| 80 | + |
| 81 | +dfs = stack(df, matmulmethodnames, variable_name = :MatMulType, value_name = :GFLOPS); |
| 82 | +p = dfs |> @vlplot(:line, x = :Size, y = :GFLOPS, width = 900, height = 600, color = {:MatMulType}); |
| 83 | +save(joinpath(pkgdir(Octavian), "docs/src/assets/sizedarraybenchmarks_$(pick_suffix()).svg"), p) |
| 84 | + |
| 85 | + |
| 86 | + |
0 commit comments