From 987f8357a057299b5b942a66c3a78a56929acf22 Mon Sep 17 00:00:00 2001 From: anicusan Date: Mon, 27 Jan 2025 13:02:41 +0000 Subject: [PATCH 1/2] Added unrolled implementation of recursivefill! which works on GPUs and avoids recomputing global indices for each setindex! --- src/array_partition.jl | 7 +++++++ src/utils.jl | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/src/array_partition.jl b/src/array_partition.jl index 7bd8bb52..4ee1a9ea 100644 --- a/src/array_partition.jl +++ b/src/array_partition.jl @@ -209,6 +209,13 @@ function Base.copyto!(A::ArrayPartition, src::ArrayPartition) A end +function recursivefill!(b::ArrayPartition, a::T2) where {T2 <: Union{Number, Bool}} + unrolled_foreach!(b.x) do x + fill!(x, a) + end +end + + ## indexing # Interface for the linear indexing. This is just a view of the underlying nested structure diff --git a/src/utils.jl b/src/utils.jl index ae4f1d2f..58945065 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,3 +1,7 @@ +unrolled_foreach!(f, t::Tuple) = (f(t[1]); unrolled_foreach!(f, Base.tail(t))) +unrolled_foreach!(f, ::Tuple{}) = nothing + + """ ```julia recursivecopy(a::Union{AbstractArray{T, N}, AbstractVectorOfArray{T, N}}) @@ -127,6 +131,7 @@ function recursivefill!(bs::AbstractVectorOfArray{T, N}, end end + for type in [AbstractArray, AbstractVectorOfArray] @eval function recursivefill!(b::$type{T, N}, a::T2) where {T <: Enum, T2 <: Enum, N} fill!(b, a) From 3a2b75fb87b5dc8fa8c90c254cf04d336a22bd4c Mon Sep 17 00:00:00 2001 From: anicusan Date: Mon, 27 Jan 2025 13:17:47 +0000 Subject: [PATCH 2/2] Added GPU tests for ArrayPartition --- test/gpu/arraypartition_gpu.jl | 16 ++++++++++++++++ test/runtests.jl | 1 + 2 files changed, 17 insertions(+) create mode 100644 test/gpu/arraypartition_gpu.jl diff --git a/test/gpu/arraypartition_gpu.jl b/test/gpu/arraypartition_gpu.jl new file mode 100644 index 00000000..c9a87dc8 --- /dev/null +++ b/test/gpu/arraypartition_gpu.jl @@ -0,0 +1,16 @@ +using RecursiveArrayTools, CUDA, Test +CUDA.allowscalar(false) + + +# Test indexing with colon +a = (CUDA.zeros(5), CUDA.zeros(5)) +pA = ArrayPartition(a) +pA[:, :] + +# Indexing with boolean masks does not work yet +mask = pA .> 0 +# pA[mask] + +# Test recursive filling is done using GPU kernels and not scalar indexing +RecursiveArrayTools.recursivefill!(pA, true) +@test all(pA .== true) diff --git a/test/runtests.jl b/test/runtests.jl index 819e40f3..4ec9d6f4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -54,5 +54,6 @@ end if GROUP == "GPU" activate_gpu_env() @time @safetestset "VectorOfArray GPU" include("gpu/vectorofarray_gpu.jl") + @time @safetestset "ArrayPartition GPU" include("gpu/arraypartition_gpu.jl") end end