Skip to content

Commit ead8fbb

Browse files
authored
Polyester 0.4 (#112)
* hacky 0.4 support (support only up to 64 threads) * VectorizationBase 0.21
1 parent 8907884 commit ead8fbb

File tree

5 files changed

+17
-12
lines changed

5 files changed

+17
-12
lines changed

Project.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "Octavian"
22
uuid = "6fd5a793-0b7e-452c-907f-f8bfe9c57db4"
33
authors = ["Mason Protter", "Chris Elrod", "Dilum Aluthge", "contributors"]
4-
version = "0.3.1"
4+
version = "0.3.2"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
@@ -18,10 +18,10 @@ ArrayInterface = "3.1.14"
1818
IfElse = "0.1"
1919
LoopVectorization = "0.12.34"
2020
ManualMemory = "0.1.1"
21-
Polyester = "0.3.5"
21+
Polyester = "0.4"
2222
Static = "0.2, 0.3"
2323
ThreadingUtilities = "0.4.6"
24-
VectorizationBase = "0.20.16"
24+
VectorizationBase = "0.21.5"
2525
julia = "1.6"
2626

2727
[extras]

src/Octavian.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ using VectorizationBase: align, AbstractStridedPointer, zstridedpointer, vsub_ns
66
static_sizeof, StridedPointer, gesp, pause, pick_vector_width, has_feature,
77
cache_size, num_cores, num_cores, cache_inclusive, cache_linesize
88
using LoopVectorization: preserve_buffer, CloseOpen, UpperBoundedInteger
9-
using ArrayInterface: size, strides, offsets, indices, axes
9+
using ArrayInterface: size, strides, offsets, indices, axes, StrideIndex
1010
using IfElse: ifelse
1111
using Polyester
1212
using Static: StaticInt, Zero, One, StaticBool, True, False, gt, eq, StaticFloat64,

src/matmul.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,9 @@ function __matmul!(
360360
clamp(div_fast(M * N, StaticInt{256}() * W), 0, _nthread-1)
361361
end
362362
# nkern = cld_fast(M * N, MᵣW * Nᵣ)
363-
threads, torelease = Polyester.request_threads(Threads.threadid()%UInt32, _nrequest)
363+
threads, torelease = Polyester.__request_threads(_nrequest % UInt32, Polyester.worker_pointer())
364+
# _threads, _torelease = Polyester.request_threads(Threads.threadid()%UInt32, _nrequest)
365+
364366
nrequest = threads.i
365367
iszero(nrequest) && @goto SINGLETHREAD
366368
nspawn = nrequest + 1

src/utils.jl

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ function default_stridedpointer_quote(::Type{T}, N, Ot) where {T}
3838
quote
3939
$(Expr(:meta,:inline))
4040
st = $st
41-
StridedPointer{$T,$N,$C,$B,$R}(ptr, $xt, $o)
41+
si = StrideIndex{$N,$R,$C}($xt, $o)
42+
stridedpointer(ptr, si, StaticInt{$B}())
4243
end
4344
end
4445

@@ -66,9 +67,10 @@ end
6667
Bn = Core.ifelse(B > 1, B+1, B)
6768
quote
6869
$(Expr(:meta,:inline))
69-
x = $gf(sp, :strd)
70+
x = strides(sp)
7071
x0 = $gf(x, 1, false)
71-
StridedPointer{$T,$(N+1),$Cn,$Bn,$Rn}($gf(sp,:p), $xt, $ot)
72+
si = StrideIndex{$(N+1),$Rn,$Cn}($xt, $ot)
73+
stridedpointer($gf(sp,:p), si, StaticInt{$Bn}())
7274
end
7375
end
7476
@generated function droplastdim(sp::StridedPointer{T,N,C,B,R}) where {T,N,C,B,R}
@@ -85,9 +87,10 @@ end
8587
end
8688
quote
8789
$(Expr(:meta,:inline))
88-
x = $gf(sp, :strd)
89-
o = $gf(sp, :offsets)
90-
StridedPointer{$T,$(N-1),$Cn,$Bn,$rt}($gf(sp,:p), $xt, $ot)
90+
x = strides(sp)
91+
o = offsets(sp)
92+
si = StrideIndex{$(N-1),$rt,$Cn}($xt, $ot)
93+
stridedpointer($gf(sp,:p), si, StaticInt{$Bn}())
9194
end
9295
end
9396

test/matmul_coverage.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ function matmul_pack_ab!(C, A, B)
88
nspawn = min(Threads.nthreads(), Octavian.num_cores())
99
GC.@preserve C A B begin
1010
if nspawn > 1
11-
threads, torelease = Octavian.Polyester.request_threads(Threads.threadid(), nspawn-1)
11+
threads, torelease = Octavian.Polyester.__request_threads((nspawn-1)%UInt32, Octavian.Polyester.worker_pointer())
1212
@assert threads.i < Threads.nthreads()
1313
Octavian.matmul_pack_A_and_B!(
1414
zc, za, zb, Octavian.StaticInt{1}(), Octavian.StaticInt{0}(), M, K, N, threads,

0 commit comments

Comments
 (0)