Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/array/stencil.jl
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,8 @@ function load_boundary_region(::Reflect{Symm}, arr, region_code::NTuple{N,Int},
# Reverse only along dimensions that are actually being reflected
# (both non-zero in region_code AND past boundary)
for i in 1:N
if region_code[i] != 0 && boundary_dims[i]
# FIXME: allowscalar because some GPU backends don't overload reverse
GPUArraysCore.@allowscalar if region_code[i] != 0 && boundary_dims[i]
region = reverse(region, dims=i)
end
end
Expand Down
36 changes: 36 additions & 0 deletions src/utils/haloarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,42 @@ end
end
end

Base.IndexStyle(::Type{<:HaloArray}) = IndexCartesian()

# GPU-friendly reductions for SubArray views of HaloArray.
# The standard reduction path uses _foldl_impl/iterate which produces Union
# types that SPIR-V and other GPU compilers can't handle, and passes through
# keyword-argument forwarding that triggers dynamic dispatch on GPU.
# These overrides use CartesianIndices iteration which compiles cleanly.
@inline function Base.mapreduce(f::F, op::OP, A::SubArray{T,N,<:HaloArray};
dims=:, init=Base._InitialValue()) where {F,OP,T,N}
first_idx = CartesianIndex(ntuple(d -> firstindex(A, d), Val(N)))
result = f(@inbounds A[first_idx])
@inbounds for idx in CartesianIndices(A)
idx == first_idx && continue
result = op(result, f(A[idx]))
end
return result
end
@inline function Base.sum(A::SubArray{T,N,<:HaloArray}) where {T,N}
first_idx = CartesianIndex(ntuple(d -> firstindex(A, d), Val(N)))
result = @inbounds A[first_idx]
@inbounds for idx in CartesianIndices(A)
idx == first_idx && continue
result += A[idx]
end
return result
end
@inline function Base.sum(f::F, A::SubArray{T,N,<:HaloArray}) where {F,T,N}
first_idx = CartesianIndex(ntuple(d -> firstindex(A, d), Val(N)))
result = f(@inbounds A[first_idx])
@inbounds for idx in CartesianIndices(A)
idx == first_idx && continue
result += f(A[idx])
end
return result
end

Adapt.adapt_structure(to, H::Dagger.HaloArray) =
HaloArray(Adapt.adapt(to, H.center),
Adapt.adapt.(Ref(to), H.halos),
Expand Down
12 changes: 6 additions & 6 deletions test/array/stencil.jl
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,14 @@ function test_stencil()
# slope at high boundary = 8.0 - 6.0 = 2.0
# idx=0 → 2.0 + 2.0*(-1) = 0.0
# idx=5 → 8.0 + 2.0*(1) = 10.0
A = DArray([2.0, 4.0, 6.0, 8.0], Blocks(2))
B = zeros(Blocks(2), Float64, 4)
A = DArray([2f0, 4f0, 6f0, 8f0], Blocks(2))
B = zeros(Blocks(2), Float32, 4)
@stencil B[idx] = sum(@neighbors(A[idx], 1, LinearExtrapolate()))
# B[1]: neighbors at indices 0, 1, 2 -> extrapolated 0 becomes 0.0, so [0.0, 2.0, 4.0] = 6.0
# B[2]: neighbors at indices 1, 2, 3 -> [2.0, 4.0, 6.0] = 12.0
# B[3]: neighbors at indices 2, 3, 4 -> [4.0, 6.0, 8.0] = 18.0
# B[4]: neighbors at indices 3, 4, 5 -> extrapolated 5 becomes 10.0, so [6.0, 8.0, 10.0] = 24.0
expected_B_extrap = [6.0, 12.0, 18.0, 24.0]
expected_B_extrap = [6f0, 12f0, 18f0, 24f0]
@test collect(B) ≈ expected_B_extrap
end

Expand Down Expand Up @@ -348,9 +348,9 @@ function test_stencil()
@stencil B[idx] *= 3
@test all(collect(B) .== 3)

C = DArray(fill(10.0, 4, 4), Blocks(2, 2))
@stencil C[idx] /= 2.0
@test all(collect(C) .== 5.0)
C = DArray(fill(10f0, 4, 4), Blocks(2, 2))
@stencil C[idx] /= 2f0
@test all(collect(C) .== 5f0)

D = DArray(fill(10, 4, 4), Blocks(2, 2))
@stencil D[idx] -= 1
Expand Down