From cd83dfe19887f732a7c5971dd141b409459386d0 Mon Sep 17 00:00:00 2001 From: Tiem van der Deure <tiemvanderdeure@gmail.com> Date: Sun, 9 Mar 2025 02:29:50 +0300 Subject: [PATCH 1/4] include DataType in CategoricalEltypes (#876) --- src/Lookups/lookup_arrays.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Lookups/lookup_arrays.jl b/src/Lookups/lookup_arrays.jl index 3be595a0c..c72a600be 100644 --- a/src/Lookups/lookup_arrays.jl +++ b/src/Lookups/lookup_arrays.jl @@ -468,7 +468,7 @@ abstract type AbstractCategorical{T,O} <: Aligned{T,O} end order(lookup::AbstractCategorical) = lookup.order metadata(lookup::AbstractCategorical) = lookup.metadata -const CategoricalEltypes = Union{AbstractChar,Symbol,AbstractString} +const CategoricalEltypes = Union{AbstractChar,Symbol,AbstractString,DataType} function Adapt.adapt_structure(to, l::AbstractCategorical) rebuild(l; data=Adapt.adapt(to, parent(l)), metadata=NoMetadata()) From 685efd7f8ad3678cb0e30b9c1926216b8ad77d4d Mon Sep 17 00:00:00 2001 From: Rafael Schouten <rafaelschouten@gmail.com> Date: Sun, 9 Mar 2025 00:30:50 +0100 Subject: [PATCH 2/4] Breaking: `DimVector` of `NamedTuple` is a `NamedTuple` `DimTable` (#839) * DimVector of NamedTuple is a NamedTuple table * bugfix * remove show * fix ambiguity --- src/tables.jl | 137 ++++++++++++++++++++++++++++++------------------- src/utils.jl | 8 ++- test/tables.jl | 15 +++++- 3 files changed, 105 insertions(+), 55 deletions(-) diff --git a/src/tables.jl b/src/tables.jl index 9773a8495..d158b2e1c 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -5,6 +5,9 @@ Abstract supertype for dim tables """ abstract type AbstractDimTable <: Tables.AbstractColumns end +struct Columns end +struct Rows end + # Tables.jl interface for AbstractDimStack and AbstractDimArray DimTableSources = Union{AbstractDimStack,AbstractDimArray} @@ -12,12 +15,8 @@ DimTableSources = Union{AbstractDimStack,AbstractDimArray} Tables.istable(::Type{<:DimTableSources}) = true Tables.columnaccess(::Type{<:DimTableSources}) = true Tables.columns(x::DimTableSources) = DimTable(x) - -Tables.columnnames(A::AbstractDimArray) = _colnames(DimStack(A)) -Tables.columnnames(s::AbstractDimStack) = _colnames(s) - -Tables.schema(A::AbstractDimArray) = Tables.schema(DimStack(A)) -Tables.schema(s::AbstractDimStack) = Tables.schema(DimTable(s)) +Tables.columnnames(x::DimTableSources) = _colnames(x) +Tables.schema(x::DimTableSources) = Tables.schema(DimTable(x)) @inline Tables.getcolumn(x::DimTableSources, i::Int) = Tables.getcolumn(DimTable(x), i) @inline Tables.getcolumn(x::DimTableSources, key::Symbol) = @@ -27,11 +26,14 @@ Tables.schema(s::AbstractDimStack) = Tables.schema(DimTable(s)) @inline Tables.getcolumn(t::DimTableSources, dim::DimOrDimType) = Tables.getcolumn(t, dimnum(t, dim)) -function _colnames(s::AbstractDimStack) - dimkeys = map(name, dims(s)) - # The data is always the last column/s - (dimkeys..., keys(s)...) +_colnames(s::AbstractDimStack) = (map(name, dims(s))..., keys(s)...) +function _colnames(A::AbstractDimArray) + n = Symbol(name(A)) == Symbol("") ? :value : Symbol(name(A)) + (map(name, dims(A))..., n) end +_colnames(A::AbstractDimVector{T}) where T<:NamedTuple = + (map(name, dims(A))..., _colnames(T)...) +_colnames(::Type{<:NamedTuple{Keys}}) where Keys = Keys # DimTable @@ -87,18 +89,20 @@ julia> a = DimArray(ones(16, 16, 3), (X, Y, Dim{:band})) 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 … 1.0 1.0 1.0 1.0 1.0 1.0 1.0 -julia> +julia> ``` """ -struct DimTable <: AbstractDimTable +struct DimTable{Mode} <: AbstractDimTable parent::Union{AbstractDimArray,AbstractDimStack} colnames::Vector{Symbol} dimcolumns::Vector{AbstractVector} - dimarraycolumns::Vector{AbstractVector} + dimarraycolumns::Vector end -function DimTable(s::AbstractDimStack; mergedims=nothing) +function DimTable(s::AbstractDimStack; + mergedims=nothing, +) s = isnothing(mergedims) ? s : DD.mergedims(s, mergedims) dimcolumns = collect(_dimcolumns(s)) dimarraycolumns = if hassamedims(s) @@ -107,40 +111,52 @@ function DimTable(s::AbstractDimStack; mergedims=nothing) map(A -> vec(DimExtensionArray(A, dims(s))), layers(s)) end |> collect keys = collect(_colnames(s)) - return DimTable(s, keys, dimcolumns, dimarraycolumns) + return DimTable{Columns}(s, keys, dimcolumns, dimarraycolumns) end -function DimTable(xs::Vararg{AbstractDimArray}; layernames=nothing, mergedims=nothing) +function DimTable(As::Vararg{AbstractDimArray}; + layernames=nothing, + mergedims=nothing, +) # Check that dims are compatible - comparedims(xs...) - + comparedims(As...) # Construct Layer Names - layernames = isnothing(layernames) ? [Symbol("layer_$i") for i in eachindex(xs)] : layernames - + layernames = isnothing(layernames) ? uniquekeys(As) : layernames # Construct dimension and array columns with DimExtensionArray - xs = isnothing(mergedims) ? xs : map(x -> DimensionalData.mergedims(x, mergedims), xs) - dims_ = dims(first(xs)) + As = isnothing(mergedims) ? As : map(x -> DD.mergedims(x, mergedims), As) + dims_ = dims(first(As)) dimcolumns = collect(_dimcolumns(dims_)) dimnames = collect(map(name, dims_)) - dimarraycolumns = collect(map(vec ∘ parent, xs)) + dimarraycolumns = collect(map(vec ∘ parent, As)) colnames = vcat(dimnames, layernames) # Return DimTable - return DimTable(first(xs), colnames, dimcolumns, dimarraycolumns) + return DimTable{Columns}(first(As), colnames, dimcolumns, dimarraycolumns) end -function DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing) - if !isnothing(layersfrom) && any(hasdim(x, layersfrom)) - d = dims(x, layersfrom) - nlayers = size(x, d) - layers = [view(x, rebuild(d, i)) for i in 1:nlayers] +function DimTable(A::AbstractDimArray; + layersfrom=nothing, + mergedims=nothing, +) + if !isnothing(layersfrom) && any(hasdim(A, layersfrom)) + d = dims(A, layersfrom) + nlayers = size(A, d) + layers = [view(A, rebuild(d, i)) for i in 1:nlayers] layernames = if iscategorical(d) Symbol.((name(d),), '_', lookup(d)) else Symbol.(("$(name(d))_$i" for i in 1:nlayers)) end - return DimTable(layers..., layernames=layernames, mergedims=mergedims) + return DimTable(layers...; layernames, mergedims) else - s = name(x) == NoName() ? DimStack((;value=x)) : DimStack(x) - return DimTable(s, mergedims=mergedims) + A = isnothing(mergedims) ? A : DD.mergedims(A, mergedims) + dimcolumns = collect(_dimcolumns(A)) + colnames = collect(_colnames(A)) + if (ndims(A) == 1) && (eltype(A) <: NamedTuple) + dimarrayrows = parent(A) + return DimTable{Rows}(A, colnames, dimcolumns, dimarrayrows) + else + dimarraycolumns = [vec(parent(A))] + return DimTable{Columns}(A, colnames, dimcolumns, dimarraycolumns) + end end end @@ -155,8 +171,6 @@ function _dimcolumn(x, d::Dimension) end end - - dimcolumns(t::DimTable) = getfield(t, :dimcolumns) dimarraycolumns(t::DimTable) = getfield(t, :dimarraycolumns) colnames(t::DimTable) = Tuple(getfield(t, :colnames)) @@ -174,12 +188,26 @@ Tables.columnaccess(::Type{<:DimTable}) = true Tables.columns(t::DimTable) = t Tables.columnnames(c::DimTable) = colnames(c) -function Tables.schema(t::DimTable) - types = vcat([map(eltype, dimcolumns(t))...], [map(eltype, dimarraycolumns(t))...]) +function Tables.schema(t::DimTable) + types = vcat([map(eltype, dimcolumns(t))...], _dimarraycolumn_eltypes(t)) Tables.Schema(colnames(t), types) end -@inline function Tables.getcolumn(t::DimTable, i::Int) +_dimarraycolumn_eltypes(t::DimTable{Columns}) = [map(eltype, dimarraycolumns(t))...] +_dimarraycolumn_eltypes(t::DimTable{Rows}) = _eltypes(eltype(dimarraycolumns(t))) +_eltypes(::Type{T}) where T<:NamedTuple = collect(T.types) + +@inline function Tables.getcolumn(t::DimTable{Rows}, i::Int) + nkeys = length(colnames(t)) + if i > length(dims(t)) + map(nt -> nt[i], dimarraycolumns(t)) + elseif i > 0 && i < nkeys + dimcolumns(t)[i] + else + throw(ArgumentError("There is no table column $i")) + end +end +@inline function Tables.getcolumn(t::DimTable{Columns}, i::Int) nkeys = length(colnames(t)) if i > length(dims(t)) dimarraycolumns(t)[i - length(dims(t))] @@ -189,12 +217,19 @@ end throw(ArgumentError("There is no table column $i")) end end - -@inline function Tables.getcolumn(t::DimTable, dim::DimOrDimType) +@inline function Tables.getcolumn(t::DimTable, dim::Union{Dimension,Type{<:Dimension}}) dimcolumns(t)[dimnum(t, dim)] end - -@inline function Tables.getcolumn(t::DimTable, key::Symbol) +@inline function Tables.getcolumn(t::DimTable{Rows}, key::Symbol) + key in colnames(t) || throw(ArgumentError("There is no table column $key")) + if hasdim(parent(t), key) + dimcolumns(t)[dimnum(t, key)] + else + # Function barrier + _col_from_rows(dimarraycolumns(t), key) + end +end +@inline function Tables.getcolumn(t::DimTable{Columns}, key::Symbol) keys = colnames(t) i = findfirst(==(key), keys) if isnothing(i) @@ -203,22 +238,20 @@ end return Tables.getcolumn(t, i) end end - @inline function Tables.getcolumn(t::DimTable, ::Type{T}, i::Int, key::Symbol) where T Tables.getcolumn(t, key) end -# TableTraits.jl interface - +_col_from_rows(rows, key) = map(row -> row[key], rows) -function IteratorInterfaceExtensions.getiterator(x::DimTableSources) - return Tables.datavaluerows(Tables.dictcolumntable(x)) -end -IteratorInterfaceExtensions.isiterable(::DimTableSources) = true +# TableTraits.jl interface TableTraits.isiterabletable(::DimTableSources) = true +TableTraits.isiterabletable(::DimTable) = true -function IteratorInterfaceExtensions.getiterator(t::DimTable) - return Tables.datavaluerows(Tables.dictcolumntable(t)) -end +# IteratorInterfaceExtensions.jl interface +IteratorInterfaceExtensions.getiterator(x::DimTableSources) = + Tables.datavaluerows(Tables.dictcolumntable(x)) +IteratorInterfaceExtensions.getiterator(t::DimTable) = + Tables.datavaluerows(Tables.dictcolumntable(t)) +IteratorInterfaceExtensions.isiterable(::DimTableSources) = true IteratorInterfaceExtensions.isiterable(::DimTable) = true -TableTraits.isiterabletable(::DimTable) = true diff --git a/src/utils.jl b/src/utils.jl index 8c9f5cb43..1a5ad439e 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -193,9 +193,13 @@ function uniquekeys(keys::Vector{Symbol}) end end function uniquekeys(keys::Tuple{Symbol,Vararg{Symbol}}) - ids = ntuple(x -> x, length(keys)) + ids = ntuple(identity, length(keys)) map(keys, ids) do k, id - count(k1 -> k == k1, keys) > 1 ? Symbol(:layer, id) : k + if k == Symbol("") + Symbol(:layer, id) + else + count(k1 -> k == k1, keys) > 1 ? Symbol(:layer, id) : k + end end end uniquekeys(t::Tuple) = ntuple(i -> Symbol(:layer, i), length(t)) diff --git a/test/tables.jl b/test/tables.jl index b5bd416ea..f5ea708db 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -1,4 +1,9 @@ -using DimensionalData, IteratorInterfaceExtensions, TableTraits, Tables, Test, DataFrames +using DimensionalData +using Test +using Tables +using IteratorInterfaceExtensions +using TableTraits +using DataFrames using DimensionalData.Lookups, DimensionalData.Dimensions using DimensionalData: DimTable, DimExtensionArray @@ -154,3 +159,11 @@ end @test Tables.columnnames(t3) == (:dimensions, :layer1, :layer2, :layer3) @test Tables.columnnames(t4) == (:band, :geometry, :value) end + +@testset "DimTable NamedTuple" begin + da = DimArray([(; a=1.0f0i, b=2.0i) for i in 1:10], X) + t = DimTable(da) + s = Tables.schema(t) + @test s.names == (:X, :a, :b) + @test s.types == (Int, Float32, Float64) +end From 49746efdc38b0e1559dccd7af71ea4ddd4b1a57f Mon Sep 17 00:00:00 2001 From: Rafael Schouten <rafaelschouten@gmail.com> Date: Sun, 9 Mar 2025 00:33:53 +0100 Subject: [PATCH 3/4] Breaking: add `combine` method for `groupby` output, fixing `similar` for `AbstractDimStack` (#903) * add combine method * test groupby and similar * docs entry --- docs/src/api/reference.md | 1 + src/DimensionalData.jl | 2 +- src/array/methods.jl | 2 +- src/groupby.jl | 49 +++++++++++++++++++++++++++++++++++++-- src/stack/indexing.jl | 26 ++++++++++----------- src/stack/stack.jl | 31 ++++++++++++++++++++++++- src/utils.jl | 5 ++++ test/groupby.jl | 27 ++++++++++++++++++--- test/stack.jl | 16 +++++++++++-- 9 files changed, 135 insertions(+), 24 deletions(-) diff --git a/docs/src/api/reference.md b/docs/src/api/reference.md index 3f6667d82..7ddb1c83c 100644 --- a/docs/src/api/reference.md +++ b/docs/src/api/reference.md @@ -69,6 +69,7 @@ For transforming DimensionalData objects: ```@docs groupby +combine DimensionalData.DimGroupByArray Bins ranges diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index 32247dba3..0d04bdf65 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -83,7 +83,7 @@ export dimnum, hasdim, hasselection, otherdims export set, rebuild, reorder, modify, broadcast_dims, broadcast_dims!, mergedims, unmergedims, maplayers -export groupby, seasons, months, hours, intervals, ranges +export groupby, combine, seasons, months, hours, intervals, ranges export @d diff --git a/src/array/methods.jl b/src/array/methods.jl index c4a63ad8d..27a1cdc35 100644 --- a/src/array/methods.jl +++ b/src/array/methods.jl @@ -421,7 +421,7 @@ function _check_cat_lookups(D, ::Regular, lookups...) @warn _cat_warn_string(D, "step sizes $(step(span(l))) and $s do not match") return false end - if !(lastval + s ≈ first(l)) + if !(s isa Dates.AbstractTime) && !(lastval + s ≈ first(l)) @warn _cat_warn_string(D, "`Regular` lookups do not join with the correct step size: $(lastval) + $s ≈ $(first(l)) should hold") return false end diff --git a/src/groupby.jl b/src/groupby.jl index 6ee94a2b4..bfcb63cdd 100644 --- a/src/groupby.jl +++ b/src/groupby.jl @@ -249,7 +249,6 @@ Group some data along the time dimension: ```jldoctest groupby; setup = :(using Random; Random.seed!(123)) julia> using DimensionalData, Dates - julia> A = rand(X(1:0.1:20), Y(1:20), Ti(DateTime(2000):Day(3):DateTime(2003))); julia> groups = groupby(A, Ti => month) # Group by month @@ -356,6 +355,7 @@ end function _group_indices(dim::Dimension, f::Base.Callable; labels=nothing) orig_lookup = lookup(dim) k1 = f(first(orig_lookup)) + # TODO: using a Dict here is a bit slow indices_dict = Dict{typeof(k1),Vector{Int}}() for (i, x) in enumerate(orig_lookup) k = f(x) @@ -447,7 +447,8 @@ end Generate a `Vector` of `UnitRange` with length `step(A)` """ -intervals(rng::AbstractRange) = IntervalSets.Interval{:closed,:open}.(rng, rng .+ step(rng)) +intervals(rng::AbstractRange) = + IntervalSets.Interval{:closed,:open}.(rng, rng .+ step(rng)) """ ranges(A::AbstractRange{<:Integer}) @@ -455,3 +456,47 @@ intervals(rng::AbstractRange) = IntervalSets.Interval{:closed,:open}.(rng, rng . Generate a `Vector` of `UnitRange` with length `step(A)` """ ranges(rng::AbstractRange{<:Integer}) = map(x -> x:x+step(rng)-1, rng) + +""" + combine(f::Function, gb::DimGroupByArray; dims=:) + +Combine the `DimGroupByArray` using function `f` over the group dimensions. +Unlike broadcasting a reducing function over a `DimGroupByArray`, this function +always returns a new flattened `AbstractDimArray` even where not all dimensions +are reduced. It will also work over grouped `AbstractDimStack`. + +If `dims` is given, it will combine only the dimensions in `dims`, the +others will be present in the final array. Note that all grouped dimensions +must be reduced and included in `dims`. + +The reducing function `f` must also accept a `dims` keyword. + +# Example + +```jldoctest groupby +```` +""" +function combine(f::Function, gb::DimGroupByArray{G}; dims=:) where G + targetdims = DD.commondims(first(gb), dims) + all(hasdim(first(gb), targetdims)) || throw(ArgumentError("dims must be a subset of the groupby dimensions")) + all(hasdim(targetdims, DD.dims(gb))) || throw(ArgumentError("grouped dimensions $(DD.basedims(gb)) must be included in dims")) + # This works for both arrays and stacks + # Combine the remaining dimensions after reduction and the group dimensions + destdims = (otherdims(DD.dims(first(gb)), dims)..., DD.dims(gb)...) + # Get the output eltype + T = Base.promote_op(f, G) + # Create a output array with the combined dimensions + dest = similar(first(gb), T, destdims) + for D in DimIndices(gb) + if all(hasdim(targetdims, DD.dims(first(gb)))) + # Assigned reduced scalar to dest + dest[D...] = f(gb[D]) + else + # Reduce with `f` and drop length 1 dimensions + xs = dropdims(f(gb[D]; dims); dims) + # Broadcast the reduced array to dest + broadcast_dims!(identity, view(dest, D...), xs) + end + end + return dest +end \ No newline at end of file diff --git a/src/stack/indexing.jl b/src/stack/indexing.jl index dad1ebaaf..8843c99ad 100644 --- a/src/stack/indexing.jl +++ b/src/stack/indexing.jl @@ -150,6 +150,9 @@ for f in (:getindex, :view, :dotview) end end +@generated function _any_dimarray(v::Union{NamedTuple,Tuple}) + any(T -> T <: AbstractDimArray, v.types) +end #### setindex #### @propagate_inbounds Base.setindex!(s::AbstractDimStack, xs, I...; kw...) = @@ -160,22 +163,17 @@ end hassamedims(s) ? _map_setindex!(s, xs, i; kw...) : _setindex_mixed!(s, xs, i; kw...) @propagate_inbounds Base.setindex!(s::AbstractDimStack, xs::NamedTuple, i::AbstractArray; kw...) = hassamedims(s) ? _map_setindex!(s, xs, i; kw...) : _setindex_mixed!(s, xs, i; kw...) +@propagate_inbounds Base.setindex!(s::AbstractDimStack, xs::NamedTuple, i::DimensionIndsArrays; kw...) = + _map_setindex!(s, xs, i; kw...) +@propagate_inbounds Base.setindex!(s::AbstractDimStack, xs::NamedTuple, I...; kw...) = + _map_setindex!(s, xs, I...; kw...) -@propagate_inbounds function Base.setindex!( - s::AbstractDimStack, xs::NamedTuple, I...; kw... -) - map((A, x) -> setindex!(A, x, I...; kw...), layers(s), xs) -end - -_map_setindex!(s, xs, i; kw...) = map((A, x) -> setindex!(A, x, i...; kw...), layers(s), xs) +_map_setindex!(s, xs, i...; kw...) = map((A, x) -> setindex!(A, x, i...; kw...), layers(s), xs) -_setindex_mixed!(s::AbstractDimStack, x, i::AbstractArray) = - map(A -> setindex!(A, x, DimIndices(dims(s))[i]), layers(s)) -_setindex_mixed!(s::AbstractDimStack, i::Integer) = - map(A -> setindex!(A, x, DimIndices(dims(s))[i]), layers(s)) -function _setindex_mixed!(s::AbstractDimStack, x, i::Colon) - map(DimIndices(dims(s))) do D - map(A -> setindex!(A, D), x, layers(s)) +function _setindex_mixed!(s::AbstractDimStack, xs::NamedTuple, i) + D = DimIndices(dims(s))[i] + map(layers(s), xs) do A, x + A[D] = x end end diff --git a/src/stack/stack.jl b/src/stack/stack.jl index 92bb07963..0e212f1c8 100644 --- a/src/stack/stack.jl +++ b/src/stack/stack.jl @@ -153,7 +153,6 @@ Base.length(s::AbstractDimStack) = prod(size(s)) Base.axes(s::AbstractDimStack) = map(first ∘ axes, dims(s)) Base.axes(s::AbstractDimStack, dims::DimOrDimType) = axes(s, dimnum(s, dims)) Base.axes(s::AbstractDimStack, dims::Integer) = axes(s)[dims] -Base.similar(s::AbstractDimStack, args...) = maplayers(A -> similar(A, args...), s) Base.eltype(::AbstractDimStack{<:Any,T}) where T = T Base.ndims(::AbstractDimStack{<:Any,<:Any,N}) where N = N Base.CartesianIndices(s::AbstractDimStack) = CartesianIndices(dims(s)) @@ -197,6 +196,36 @@ Base.get(f::Base.Callable, st::AbstractDimStack, k::Symbol) = @propagate_inbounds Base.iterate(st::AbstractDimStack, i) = i > length(st) ? nothing : (st[DimIndices(st)[i]], i + 1) +Base.similar(s::AbstractDimStack) = similar(s, eltype(s)) +Base.similar(s::AbstractDimStack, dims::Dimension...) = similar(s, dims) +Base.similar(s::AbstractDimStack, ::Type{T},dims::Dimension...) where T = + similar(s, T, dims) +Base.similar(s::AbstractDimStack, dims::Tuple{Vararg{Dimension}}) = + similar(s, eltype(s), dims) +Base.similar(s::AbstractDimStack, ::Type{T}) where T = + similar(s, T, dims(s)) +function Base.similar(s::AbstractDimStack, ::Type{T}, dims::Tuple) where T + # Any dims not in the stack are added to all layers + ods = otherdims(dims, DD.dims(s)) + maplayers(s) do A + # Original layer dims are maintained, other dims are added + D = DD.commondims(dims, (DD.dims(A)..., ods...)) + similar(A, T, D) + end +end +function Base.similar(s::AbstractDimStack, ::Type{T}, dims::Tuple) where T<:NamedTuple + ods = otherdims(dims, DD.dims(s)) + maplayers(s, _nt_types(T)) do A, Tx + D = DD.commondims(dims, (DD.dims(A)..., ods...)) + similar(A, Tx, D) + end +end + +@generated function _nt_types(::Type{NamedTuple{K,T}}) where {K,T} + expr = Expr(:tuple, T.parameters...) + return :(NamedTuple{K}($expr)) +end + # `merge` for AbstractDimStack and NamedTuple. # One of the first three arguments must be an AbstractDimStack for dispatch to work. Base.merge(s::AbstractDimStack) = s diff --git a/src/utils.jl b/src/utils.jl index 1a5ad439e..0b8092f12 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -160,6 +160,11 @@ function broadcast_dims!(f, dest::AbstractDimArray{<:Any,N}, As::AbstractBasicDi od = map(A -> otherdims(dest, dims(A)), As) return _broadcast_dims_inner!(f, dest, As, od) end +function broadcast_dims!(f, dest::AbstractDimStack, stacks::AbstractDimStack...) + maplayers(dest, stacks...) do d, layers... + broadcast_dims!(f, d, layers...) + end +end # Function barrier function _broadcast_dims_inner!(f, dest, As, od) diff --git a/test/groupby.jl b/test/groupby.jl index 3f6e1f7ce..cd19274bd 100644 --- a/test/groupby.jl +++ b/test/groupby.jl @@ -8,7 +8,6 @@ days = DateTime(2000):Day(1):DateTime(2000, 12, 31) A = DimArray((1:6) * (1:366)', (X(1:0.2:2), Ti(days))) st = DimStack((a=A, b=A, c=A[X=1])) - @testset "group eltype matches indexed values" begin da = rand(X(1:10), Y(1:10)) grps = groupby(da, X => isodd) @@ -22,10 +21,16 @@ end mean(A[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1]) end @test mean.(groupby(A, Ti=>month)) == manualmeans + combinedmeans = combine(mean, groupby(A, Ti=>month)) + @test combinedmeans isa DimArray + @test combinedmeans == manualmeans manualmeans_st = map(months) do m mean(st[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1]) end @test mean.(groupby(st, Ti=>month)) == manualmeans_st + combinedmeans_st = combine(mean, groupby(st, Ti=>month)) + @test combinedmeans_st isa DimStack{(:a, :b, :c), @NamedTuple{a::Float64, b::Float64, c::Float64}} + @test collect(combinedmeans_st) == manualmeans_st manualsums = mapreduce(hcat, months) do m vcat(sum(A[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1, X=1 .. 1.5]), @@ -36,6 +41,8 @@ end @test dims(gb_sum, Ti) == Ti(Sampled([1:12...], ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata())) @test typeof(dims(gb_sum, X)) == typeof(X(Sampled(BitVector([false, true]), ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata()))) @test gb_sum == manualsums + combined_sum = combine(sum, groupby(A, Ti=>month, X => >(1.5))) + @test collect(combined_sum) == manualsums manualsums_st = mapreduce(hcat, months) do m vcat(sum(st[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1, X=1 .. 1.5]), @@ -46,10 +53,22 @@ end @test dims(gb_sum_st, Ti) == Ti(Sampled([1:12...], ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata())) @test typeof(dims(gb_sum_st, X)) == typeof(X(Sampled(BitVector([false, true]), ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata()))) @test gb_sum_st == manualsums_st + combined_sum_st = combine(sum, groupby(st, Ti=>month, X => >(1.5))) + @test collect(combined_sum_st) == manualsums_st @test_throws ArgumentError groupby(st, Ti=>month, Y=>isodd) end +@testset "partial reductions in combine" begin + months = DateTime(2000):Month(1):DateTime(2000, 12, 31) + using BenchmarkTools + manualmeans = cat(map(months) do m + mean(A[Ti=dayofyear(m):dayofyear(m)+daysinmonth(m)-1]; dims=Ti) + end...; dims=Ti(collect(1:12))) + combinedmeans = combine(mean, groupby(A, Ti()=>month); dims=Ti()) + @test combinedmeans == manualmeans +end + @testset "bins" begin seasons = DateTime(2000):Month(3):DateTime(2000, 12, 31) manualmeans = map(seasons) do s @@ -59,6 +78,7 @@ end @test mean.(groupby(A, Ti=>Bins(month, ranges(1:3:12)))) == manualmeans @test mean.(groupby(A, Ti=>Bins(month, intervals(1:3:12)))) == manualmeans @test mean.(groupby(A, Ti=>Bins(month, 4))) == manualmeans + @test combine(mean, groupby(A, Ti=>Bins(month, ranges(1:3:12)))) == manualmeans end @testset "dimension matching groupby" begin @@ -75,9 +95,10 @@ end end @test all(collect(mean.(gb)) .=== manualmeans) @test all(mean.(gb) .=== manualmeans) + @test all(combine(mean, gb) .=== manualmeans) end -@testset "broadcastdims runs after groupby" begin +@testset "broadcast_dims runs after groupby" begin dimlist = ( Ti(Date("2021-12-01"):Day(1):Date("2022-12-31")), X(range(1, 10, length=10)), @@ -87,7 +108,7 @@ end data = rand(396, 10, 15, 2) A = DimArray(data, dimlist) month_length = DimArray(daysinmonth, dims(A, Ti)) - g_tempo = DimensionalData.groupby(month_length, Ti=>seasons(; start=December)) + g_tempo = DimensionalData.groupby(month_length, Ti => seasons(; start=December)) sum_days = sum.(g_tempo, dims=Ti) @test sum_days isa DimArray weights = map(./, g_tempo, sum_days) diff --git a/test/stack.jl b/test/stack.jl index cfde24449..ee6d2eac0 100644 --- a/test/stack.jl +++ b/test/stack.jl @@ -3,7 +3,7 @@ using DimensionalData, Test, LinearAlgebra, Statistics, ConstructionBase, Random using DimensionalData: data using DimensionalData: Sampled, Categorical, AutoLookup, NoLookup, Transformed, Regular, Irregular, Points, Intervals, Start, Center, End, - Metadata, NoMetadata, ForwardOrdered, ReverseOrdered, Unordered, layers, basedims + Metadata, NoMetadata, ForwardOrdered, ReverseOrdered, Unordered, layers, basedims, layerdims A = [1.0 2.0 3.0; 4.0 5.0 6.0] @@ -94,11 +94,23 @@ end @test all(maplayers(similar(mixed), mixed) do s, m dims(s) == dims(m) && dims(s) === dims(m) && eltype(s) === eltype(m) end) - @test eltype(similar(s, Int)) === @NamedTuple{one::Int, two::Int, three::Int} + @test eltype(similar(s, Int)) === + @NamedTuple{one::Int, two::Int, three::Int} + @test eltype(similar(s, @NamedTuple{one::Int, two::Float32, three::Bool})) === + @NamedTuple{one::Int, two::Float32, three::Bool} st2 = similar(mixed, Bool, x, y) @test dims(st2) === (x, y) @test dims(st2[:one]) === (x, y) @test eltype(st2) === @NamedTuple{one::Bool, two::Bool, extradim::Bool} + @test eltype(similar(mixed)) == eltype(mixed) + @test size(similar(mixed)) == size(mixed) + @test keys(similar(mixed)) == keys(mixed) + @test layerdims(similar(mixed)) == layerdims(mixed) + xy = (X(), Y()) + @test layerdims(similar(mixed, dims(mixed, (X, Y)))) == (one=xy, two=xy, extradim=xy) + st3 = similar(mixed, @NamedTuple{one::Int, two::Float32, extradim::Bool}, (Z([:a, :b, :c]), Ti(1:12), X(1:3))) + @test layerdims(st3) == (one=(Ti(), X()), two=(Ti(), X()), extradim=(Z(), Ti(), X())) + @test eltype(st3) == @NamedTuple{one::Int, two::Float32, extradim::Bool} end @testset "merge" begin From 4b8bd51c9759d5e5b34d68ac8db5fa45f2cb2771 Mon Sep 17 00:00:00 2001 From: Rafael Schouten <rafaelschouten@gmail.com> Date: Wed, 26 Mar 2025 22:49:18 +0100 Subject: [PATCH 4/4] Breaking: `preservedims` in tables (#917) * add preservedims keyword to DimTable * add tests * Apply suggestions from code review Co-authored-by: Anshul Singhvi <anshulsinghvi@gmail.com> * tests, and fix DimSlices * better table docs * cleanup * test * indexing overhaul * fix similar and broadcast for basicdimarray * bugfix rebuildsliced * more indexing cleanup * cleanup similar and gubfix indexing * bugfixes * uncomment * fix doctests * just dont doctest unreproducable failures, for now * combine new Tables integrations * bugfix and cleanup show * bugfix and more tests for preservedims and mergedims --------- Co-authored-by: Anshul Singhvi <anshulsinghvi@gmail.com> --- src/DimensionalData.jl | 1 + src/Dimensions/dimension.jl | 1 + src/Lookups/Lookups.jl | 6 +- src/Lookups/beginend.jl | 2 + src/Lookups/lookup_arrays.jl | 11 +- src/Lookups/selector.jl | 9 + src/array/array.jl | 72 ++++-- src/array/broadcast.jl | 40 +++- src/array/indexing.jl | 189 +++++++-------- src/array/methods.jl | 5 +- src/dimindices.jl | 429 ++++++++++++++++++++--------------- src/groupby.jl | 140 ++++++++---- src/opaque.jl | 19 ++ src/stack/methods.jl | 4 +- src/tables.jl | 151 ++++++++---- test/dimindices.jl | 15 +- test/groupby.jl | 6 +- test/indexing.jl | 2 + test/runtests.jl | 1 - test/stack.jl | 2 +- test/tables.jl | 73 ++++-- 21 files changed, 742 insertions(+), 436 deletions(-) create mode 100644 src/opaque.jl diff --git a/src/DimensionalData.jl b/src/DimensionalData.jl index 0d04bdf65..009084c4e 100644 --- a/src/DimensionalData.jl +++ b/src/DimensionalData.jl @@ -116,6 +116,7 @@ include("tables.jl") include("plotrecipes.jl") include("utils.jl") include("set.jl") +include("opaque.jl") include("groupby.jl") include("precompile.jl") include("interface_tests.jl") diff --git a/src/Dimensions/dimension.jl b/src/Dimensions/dimension.jl index 38d07a631..6e554448c 100644 --- a/src/Dimensions/dimension.jl +++ b/src/Dimensions/dimension.jl @@ -277,6 +277,7 @@ Base.axes(d::Dimension, i) = axes(d)[i] Base.eachindex(d::Dimension) = eachindex(val(d)) Base.length(d::Dimension) = length(val(d)) Base.ndims(d::Dimension) = 0 +Base.parentindices(d::Dimension{<:AbstractArray}) = parentindices(parent(d)) Base.ndims(d::Dimension{<:AbstractArray}) = ndims(val(d)) Base.iterate(d::Dimension{<:AbstractArray}, args...) = iterate(lookup(d), args...) Base.first(d::Dimension) = val(d) diff --git a/src/Lookups/Lookups.jl b/src/Lookups/Lookups.jl index fb7220d87..a1dee6e06 100644 --- a/src/Lookups/Lookups.jl +++ b/src/Lookups/Lookups.jl @@ -59,17 +59,15 @@ export Unaligned, Transformed, ArrayLookup # Deprecated export LookupArray -const StandardIndices = Union{AbstractArray{<:Integer},Colon,Integer,CartesianIndex,CartesianIndices} - # As much as possible keyword rebuild is automatic rebuild(x; kw...) = ConstructionBase.setproperties(x, (; kw...)) -include("metadata.jl") include("lookup_traits.jl") +include("metadata.jl") include("lookup_arrays.jl") +include("beginend.jl") include("predicates.jl") include("selector.jl") -include("beginend.jl") include("indexing.jl") include("methods.jl") include("utils.jl") diff --git a/src/Lookups/beginend.jl b/src/Lookups/beginend.jl index 86fdf190b..a84e0ac70 100644 --- a/src/Lookups/beginend.jl +++ b/src/Lookups/beginend.jl @@ -45,6 +45,8 @@ Base.to_indices(A, inds, (r, args...)::Tuple{<:Union{Begin,End,<:LazyMath},Varar _to_index(inds, a::Int) = a _to_index(inds, ::Begin) = first(inds) _to_index(inds, ::End) = last(inds) +_to_index(inds, ::Type{Begin}) = first(inds) +_to_index(inds, ::Type{End}) = last(inds) _to_index(inds, l::LazyMath{End}) = l.f(last(inds)) _to_index(inds, l::LazyMath{Begin}) = l.f(first(inds)) diff --git a/src/Lookups/lookup_arrays.jl b/src/Lookups/lookup_arrays.jl index c72a600be..44d99c3eb 100644 --- a/src/Lookups/lookup_arrays.jl +++ b/src/Lookups/lookup_arrays.jl @@ -32,6 +32,7 @@ Base.first(l::Lookup) = first(parent(l)) Base.last(l::Lookup) = last(parent(l)) Base.firstindex(l::Lookup) = firstindex(parent(l)) Base.lastindex(l::Lookup) = lastindex(parent(l)) +Base.parentindices(l::Lookup) = parentindices(parent(l)) function Base.:(==)(l1::Lookup, l2::Lookup) basetypeof(l1) == basetypeof(l2) && parent(l1) == parent(l2) end @@ -159,11 +160,10 @@ NoLookup() = NoLookup(AutoValues()) rebuild(l::NoLookup; data=parent(l), kw...) = NoLookup(data) # Used in @d broadcasts -struct Length1NoLookup <: AbstractNoLookup end -Length1NoLookup(::AbstractVector) = Length1NoLookup() - -rebuild(l::Length1NoLookup; kw...) = Length1NoLookup() -Base.parent(::Length1NoLookup) = Base.OneTo(1) +struct Length1NoLookup{A<:AbstractUnitRange} <: AbstractNoLookup + data::A +end +Length1NoLookup() = Length1NoLookup(Base.OneTo(1)) """ AbstractSampled <: Aligned @@ -866,6 +866,7 @@ promote_first(x1, x2, xs...) = # Fallback NoLookup if not identical type promote_first(l1::Lookup) = l1 promote_first(l1::L, ls::L...) where L<:Lookup = rebuild(l1; metadata=NoMetadata) +promote_first(l1::L, ls::L...) where L<:AbstractNoLookup = l1 function promote_first(l1::Lookup, ls1::Lookup...) ls = _remove(Length1NoLookup, l1, ls1...) if length(ls) != length(ls1) + 1 diff --git a/src/Lookups/selector.jl b/src/Lookups/selector.jl index 01c6f6d7e..302120d62 100644 --- a/src/Lookups/selector.jl +++ b/src/Lookups/selector.jl @@ -1,3 +1,12 @@ +const StandardIndices = Union{ + AbstractArray{<:Integer}, + Colon, + Integer, + CartesianIndex, + CartesianIndices, + BeginEndRange, +} + struct SelectorError{L,S} <: Exception lookup::L selector::S diff --git a/src/array/array.jl b/src/array/array.jl index 0b776fa67..ec414e5ba 100644 --- a/src/array/array.jl +++ b/src/array/array.jl @@ -1,4 +1,5 @@ const IDim = Dimension{<:StandardIndices} +const MaybeDimTuple = Tuple{Vararg{Dimension}} """ AbstractBasicDimArray <: AbstractArray @@ -8,7 +9,7 @@ returns a `Tuple` of `Dimension` Only keyword `rebuild` is guaranteed to work with `AbstractBasicDimArray`. """ -abstract type AbstractBasicDimArray{T,N,D<:Tuple} <: AbstractArray{T,N} end +abstract type AbstractBasicDimArray{T,N,D<:MaybeDimTuple} <: AbstractArray{T,N} end const AbstractBasicDimVector = AbstractBasicDimArray{T,1} where T const AbstractBasicDimMatrix = AbstractBasicDimArray{T,2} where T @@ -94,10 +95,27 @@ metadata(A::AbstractDimArray) = A.metadata layerdims(A::AbstractDimArray) = basedims(A) -@inline rebuildsliced(A::AbstractBasicDimArray, args...) = rebuildsliced(getindex, A, args...) -@inline function rebuildsliced(f::Function, A::AbstractBasicDimArray, data::AbstractArray, I::Tuple, name=name(A)) +""" + rebuildsliced(f::Function, A::AbstractBasicDimArray, I) + +Rebuild `AbstractDimArray` where `f` is `getindex` , `view` or `dotview`. + +This does not need to be defined for `AbstractDimArray`, as `f` +is simply called on the parent array, dims and refdims are sliced with `slicedims`, +and `rebuild` is called. + +However for custom `AbstractBasicDimArray`, `rebuildsliced` methods are needed +to define slicing behavior, as there not be a parent array. +""" +@propagate_inbounds rebuildsliced(A::AbstractBasicDimArray, args...) = rebuildsliced(getindex, A, args...) +@propagate_inbounds function rebuildsliced(f::Function, A::AbstractDimArray, I::Tuple, name=name(A)) + I1 = to_indices(A, I) + data = f(parent(A), I1...) + return rebuildsliced(f, A, data, I1, name) +end +@propagate_inbounds function rebuildsliced(f::Function, A::AbstractDimArray, data::AbstractArray, I::Tuple, name=name(A)) I1 = to_indices(A, I) - rebuild(A, data, slicedims(f, A, I1)..., name) + return rebuild(A, data, slicedims(f, A, I1)..., name) end # Array interface methods ###################################################### @@ -107,6 +125,7 @@ Base.axes(A::AbstractDimArray) = map(Dimensions.DimUnitRange, axes(parent(A)), d Base.iterate(A::AbstractDimArray, args...) = iterate(parent(A), args...) Base.IndexStyle(A::AbstractDimArray) = Base.IndexStyle(parent(A)) Base.parent(A::AbstractDimArray) = data(A) +Base.parentindices(A::AbstractDimArray) = parentindices(parent(A)) Base.vec(A::AbstractDimArray) = vec(parent(A)) # Only compare data and dim - metadata and refdims can be different Base.:(==)(A1::AbstractDimArray, A2::AbstractDimArray) = @@ -170,14 +189,6 @@ end # An alternative would be to fill missing dims with `Anon`, and keep existing # dims but strip the Lookup? It just seems a little complicated when the methods # below using DimTuple work better anyway. -Base.similar(A::AbstractDimArray, i::Integer, I::Vararg{Integer}; kw...) = - similar(A, eltype(A), (i, I...); kw...) -Base.similar(A::AbstractDimArray, I::Tuple{Int,Vararg{Int}}; kw...) = - similar(A, eltype(A), I; kw...) -Base.similar(A::AbstractDimArray, ::Type{T}, i::Integer, I::Vararg{Integer}; kw...) where T = - similar(A, T, (i, I...); kw...) -Base.similar(A::AbstractDimArray, ::Type{T}, I::Tuple{Int,Vararg{Int}}; kw...) where T = - similar(parent(A), T, I) const MaybeDimUnitRange = Union{Integer,Base.OneTo,Dimensions.DimUnitRange} # when all axes are DimUnitRanges we can return an `AbstractDimArray` @@ -256,14 +267,27 @@ function _similar(::Type{T}, shape::Tuple; kw...) where {T<:AbstractArray} end # With Dimensions we can return an `AbstractDimArray` -Base.similar(A::AbstractBasicDimArray, D::DimTuple; kw...) = Base.similar(A, eltype(A), D; kw...) -Base.similar(A::AbstractBasicDimArray, D::Dimension...; kw...) = Base.similar(A, eltype(A), D; kw...) -Base.similar(A::AbstractBasicDimArray, ::Type{T}, D::Dimension...; kw...) where T = - Base.similar(A, T, D; kw...) +Base.similar(A::AbstractBasicDimArray, d1::Dimension, D::Dimension...; kw...) = + Base.similar(A, eltype(A), (d1, D...); kw...) +Base.similar(A::AbstractBasicDimArray, ::Type{T}, d1::Dimension, D::Dimension...; kw...) where T = + Base.similar(A, T, (d1, D...); kw...) +Base.similar(A::AbstractBasicDimArray, D::DimTuple; kw...) = + Base.similar(A, eltype(A), D; kw...) +function Base.similar(A::AbstractBasicDimArray, ::Type{T}, D::DimTuple; kw...) where T + data = _arraytype(T)(undef, _dimlength(D)) + dimconstructor(D)(data, D; kw...) +end +function Base.similar(A::AbstractBasicDimArray, ::Type{T}, D::Tuple{}; + refdims=(), name=_noname(A), metadata=NoMetadata(), kw... +) where T + data = _arraytype(T)(undef, _dimlength(D)) + dimconstructor(D)(data, (); refdims, name, metadata, kw...) +end + function Base.similar(A::AbstractDimArray, ::Type{T}, D::DimTuple; refdims=(), name=_noname(A), metadata=NoMetadata(), kw... ) where T - data = similar(parent(A), T, _dimlength(D)) + data = _arraytype(T)(undef, _dimlength(D)) dims = _maybestripval(D) return rebuild(A; data, dims, refdims, metadata, name, kw...) end @@ -274,6 +298,20 @@ function Base.similar(A::AbstractDimArray, ::Type{T}, D::Tuple{}; rebuild(A; data, dims=(), refdims, metadata, name, kw...) end +Base.similar(A::AbstractBasicDimArray, shape::Int...; kw...) = + similar(A, eltype(A), shape; kw...) +Base.similar(A::AbstractBasicDimArray, shape::Tuple{Vararg{Int}}; kw...) = + similar(A, eltype(A), shape; kw...) +Base.similar(A::AbstractBasicDimArray, ::Type{T}, shape::Int...; kw...) where T = + similar(A, T, shape; kw...) +Base.similar(A::AbstractBasicDimArray, ::Type{T}, shape::Tuple{Vararg{Int}}; kw...) where T = + _arraytype(T)(undef, shape) +Base.similar(A::AbstractDimArray, ::Type{T}, shape::Tuple{Vararg{Int}}; kw...) where T = + similar(parent(A), T, shape) + +_arraytype(::Type{T}) where T = Array{T} +_arraytype(::Type{Bool}) = BitArray + # Keep the same type in `similar` _noname(A::AbstractBasicDimArray) = _noname(name(A)) _noname(s::String) = "" diff --git a/src/array/broadcast.jl b/src/array/broadcast.jl index b6f6f27d9..30b86383f 100644 --- a/src/array/broadcast.jl +++ b/src/array/broadcast.jl @@ -35,7 +35,9 @@ strict_broadcast!(x::Bool) = STRICT_BROADCAST_CHECKS[] = x # It preserves the dimension names. # `S` should be the `BroadcastStyle` of the wrapped type. # Copied from NamedDims.jl (thanks @oxinabox). -struct DimensionalStyle{S <: BroadcastStyle} <: AbstractArrayStyle{Any} end +struct BasicDimensionalStyle{N} <: AbstractArrayStyle{Any} end + +struct DimensionalStyle{S<:BroadcastStyle} <: AbstractArrayStyle{Any} end DimensionalStyle(::S) where {S} = DimensionalStyle{S}() DimensionalStyle(::S, ::Val{N}) where {S,N} = DimensionalStyle(S(Val(N))) DimensionalStyle(::Val{N}) where N = DimensionalStyle{DefaultArrayStyle{N}}() @@ -53,6 +55,8 @@ function BroadcastStyle(::Type{<:AbstractDimArray{T,N,D,A}}) where {T,N,D,A} inner_style = typeof(BroadcastStyle(A)) return DimensionalStyle{inner_style}() end +BroadcastStyle(::Type{<:AbstractBasicDimArray{T,N}}) where {T,N} = + BasicDimensionalStyle{N}() BroadcastStyle(::DimensionalStyle, ::Base.Broadcast.Unknown) = Unknown() BroadcastStyle(::Base.Broadcast.Unknown, ::DimensionalStyle) = Unknown() @@ -79,12 +83,31 @@ function Broadcast.copy(bc::Broadcasted{DimensionalStyle{S}}) where S dims = format(Dimensions.promotedims(bdims...; skip_length_one=true), data) return rebuild(A; data, dims, refdims=refdims(A), name=Symbol("")) end +function Broadcast.copy(bc::Broadcasted{BasicDimensionalStyle{N}}) where N + A = _firstdimarray(bc) + data = collect(bc) + A isa Nothing && return data # No AbstractDimArray + + bdims = _broadcasted_dims(bc) + _comparedims_broadcast(A, bdims...) + + data isa AbstractArray || return data # result is a scalar + + # Return an AbstractDimArray + dims = format(Dimensions.promotedims(bdims...; skip_length_one=true), data) + return dimconstructor(dims)(data, dims; refdims=refdims(A), name=Symbol("")) +end function Base.copyto!(dest::AbstractArray, bc::Broadcasted{DimensionalStyle{S}}) where S fda = _firstdimarray(bc) isnothing(fda) || _comparedims_broadcast(fda, _broadcasted_dims(bc)...) copyto!(dest, _unwrap_broadcasted(bc)) end +function Base.copyto!(dest::AbstractArray, bc::Broadcasted{BasicDimensionalStyle{N}}) where N + fda = _firstdimarray(bc) + isnothing(fda) || _comparedims_broadcast(fda, _broadcasted_dims(bc)...) + copyto!(dest, bc) +end @inline function Base.Broadcast.materialize!(dest::AbstractDimArray, bc::Base.Broadcast.Broadcasted{<:Any}) # Need to check whether the dims are compatible in dest, @@ -97,7 +120,15 @@ end function Base.similar(bc::Broadcast.Broadcasted{DimensionalStyle{S}}, ::Type{T}) where {S,T} A = _firstdimarray(bc) - rebuildsliced(A, similar(_unwrap_broadcasted(bc), T, axes(bc)...), axes(bc), Symbol("")) + data = similar(_unwrap_broadcasted(bc), T, size(bc)) + dims, refdims = slicedims(A, axes(bc)) + return rebuild(A; data, dims, refdims, name=Symbol("")) +end +function Base.similar(bc::Broadcast.Broadcasted{BasicDimensionalStyle{N}}, ::Type{T}) where {N,T} + A = _firstdimarray(bc) + data = similar(A, T, size(bc)) + dims, refdims = slicedims(A, axes(bc)) + return dimconstructor(dims)(data, dims; refdims, name=Symbol("")) end @@ -383,9 +414,10 @@ _unwrap_broadcasted(boda::BroadcastOptionsDimArray) = parent(parent(boda)) # Get the first dimensional array in the broadcast _firstdimarray(x::Broadcasted) = _firstdimarray(x.args) -_firstdimarray(x::Tuple{<:AbstractDimArray,Vararg}) = x[1] +_firstdimarray(x::Tuple{<:AbstractBasicDimArray,Vararg}) = x[1] +_firstdimarray(x::AbstractBasicDimArray) = x _firstdimarray(ext::Base.Broadcast.Extruded) = _firstdimarray(ext.x) -function _firstdimarray(x::Tuple{<:Broadcasted,Vararg}) +function _firstdimarray(x::Tuple{<:Union{Broadcasted,Base.Broadcast.Extruded},Vararg}) found = _firstdimarray(x[1]) if found isa Nothing _firstdimarray(tail(x)) diff --git a/src/array/indexing.jl b/src/array/indexing.jl index ecce5fbef..68dc41c28 100644 --- a/src/array/indexing.jl +++ b/src/array/indexing.jl @@ -1,112 +1,87 @@ -# getindex/view/setindex! ====================================================== +const SelectorOrStandard = Union{SelectorOrInterval,StandardIndices} +const DimensionIndsArrays = Union{AbstractArray{<:Dimension},AbstractArray{<:DimTuple}} +const DimensionalIndices = Union{DimTuple,DimIndices,DimSelectors,Dimension,DimensionIndsArrays} +const _DimIndicesAmb = Union{AbstractArray{Union{}},DimIndices{<:Integer},DimSelectors{<:Integer}} +const IntegerOrCartesian = Union{Integer,CartesianIndex} -#### getindex/view #### +# getindex/view/setindex! ====================================================== for f in (:getindex, :view, :dotview) _dim_f = Symbol(:_dim_, f) + + # Integer indexing if f === :view - # No indices and we try to rebuild, for 0d - @eval @propagate_inbounds Base.view(A::AbstractDimArray) = rebuild(A, Base.view(parent(A)), ()) # With one Integer and 0d and 1d we try to rebuild - @eval @propagate_inbounds Base.$f(A::AbstractDimArray{<:Any,0}, i::Integer) = - rebuildsliced(Base.$f, A, Base.$f(parent(A), i), (i,)) - @eval @propagate_inbounds Base.$f(A::AbstractDimVector, i::Integer) = - rebuildsliced(Base.$f, A, Base.$f(parent(A), i), (i,)) + @eval @propagate_inbounds Base.$f(A::AbstractBasicDimArray{<:Any,0}, i::Integer) = + rebuildsliced(Base.$f, A, (i,)) + # One Integer on a vector and we also rebuild + @eval @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::Integer) = + rebuildsliced(Base.$f, A, (i,)) + # More Integers and we rebuild + @eval @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i1::Integer, i2::Integer, I::Integer...) = + rebuildsliced(Base.$f, A, (i1, i2, I...)) # Otherwise its linear indexing, don't rebuild - @eval @propagate_inbounds Base.$f(A::AbstractDimArray, i::Integer) = + @eval @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i::Integer) = Base.$f(parent(A), i) - # More Integer and we rebuild again - @eval @propagate_inbounds Base.$f(A::AbstractDimArray, i1::Integer, i2::Integer, I::Integer...) = - rebuildsliced(Base.$f, A, Base.$f(parent(A), i1, i2, I...), (i1, i2, I...)) - else - @eval @propagate_inbounds Base.$f(A::AbstractDimVector, i::Integer) = Base.$f(parent(A), i) - @eval @propagate_inbounds Base.$f(A::AbstractDimArray, i::Integer) = Base.$f(parent(A), i) - @eval @propagate_inbounds Base.$f(A::AbstractDimArray, i1::Integer, i2::Integer, I::Integer...) = - Base.$f(parent(A), i1, i2, I...) - @eval @propagate_inbounds Base.$f(A::AbstractDimArray) = Base.$f(parent(A)) end @eval begin - @propagate_inbounds Base.$f(A::AbstractDimVector, I::CartesianIndex) = + ### Standard indices + @propagate_inbounds Base.$f(A::AbstractBasicDimVector, I::CartesianIndex) = Base.$f(A, to_indices(A, (I,))...) - @propagate_inbounds Base.$f(A::AbstractDimArray, I::CartesianIndex) = + @propagate_inbounds Base.$f(A::AbstractBasicDimArray, I::CartesianIndex) = Base.$f(A, to_indices(A, (I,))...) - @propagate_inbounds Base.$f(A::AbstractDimVector, I::CartesianIndices) = - rebuildsliced(Base.$f, A, Base.$f(parent(A), I), (I,)) - @propagate_inbounds Base.$f(A::AbstractDimArray, I::CartesianIndices) = - rebuildsliced(Base.$f, A, Base.$f(parent(A), I), (I,)) - @propagate_inbounds function Base.$f(A::AbstractDimVector, i) - x = Base.$f(parent(A), Lookups._construct_types(i)) - if x isa AbstractArray - rebuildsliced(Base.$f, A, x, to_indices(A, (i,))) - else - x - end - end - @propagate_inbounds function Base.$f(A::AbstractDimArray, i1, i2, Is...) - I = Lookups._construct_types(i1, i2, Is...) - x = Base.$f(parent(A), I...) - if x isa AbstractArray - rebuildsliced(Base.$f, A, x, to_indices(A, I)) - else - x - end - end - # Linear indexing forwards to the parent array as it will break the dimensions - @propagate_inbounds Base.$f(A::AbstractDimArray, i::Union{Colon,AbstractArray{<:Integer}}) = - Base.$f(parent(A), i) - # Except 1D DimArrays - @propagate_inbounds Base.$f(A::AbstractDimVector, i::Union{Colon,AbstractArray{<:Integer}}) = - rebuildsliced(Base.$f, A, Base.$f(parent(A), i), (i,)) - @propagate_inbounds Base.$f(A::AbstractDimVector, i::SelectorOrInterval) = + @eval @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i1::IntegerOrCartesian, i2::IntegerOrCartesian, Is::IntegerOrCartesian...) = + Base.$f(A, to_indices(A, (i1, i2, Is...))...) + # 1D DimArrays dont need linear indexing + @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::Union{Colon,AbstractArray{<:Integer}}) = + rebuildsliced(Base.$f, A, (i,)) + @propagate_inbounds Base.$f(A::AbstractBasicDimVector, I::CartesianIndices) = rebuildsliced(Base.$f, A, (I,)) + @propagate_inbounds Base.$f(A::AbstractBasicDimArray, I::CartesianIndices) = rebuildsliced(Base.$f, A, (I,)) + @eval @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i1::StandardIndices, i2::StandardIndices, Is::StandardIndices...) = + rebuildsliced(Base.$f, A, to_indices(A, (i1, i2, Is...))) + + ### Selector/Interval indexing + @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::SelectorOrInterval) = Base.$f(A, dims2indices(A, (i,))...) - # Selector/Interval indexing - @propagate_inbounds Base.$f(A::AbstractDimArray, i1::SelectorOrStandard, i2::SelectorOrStandard, I::SelectorOrStandard...) = + @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i1::SelectorOrStandard, i2::SelectorOrStandard, I::SelectorOrStandard...) = Base.$f(A, dims2indices(A, (i1, i2, I...))...) + @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::Selector{<:Extents.Extent}) = + Base.$f(A, dims2indices(A, i)...) + @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i::Selector{<:Extents.Extent}) = + Base.$f(A, dims2indices(A, i)...) - @propagate_inbounds Base.$f(A::AbstractDimVector, extent::Union{Extents.Extent,Near{<:Extents.Extent},Touches{<:Extents.Extent}}) = - Base.$f(A, dims2indices(A, extent)...) - @propagate_inbounds Base.$f(A::AbstractDimArray, extent::Union{Extents.Extent,Near{<:Extents.Extent},Touches{<:Extents.Extent}}) = + # Extent indexing + @propagate_inbounds Base.$f(A::AbstractBasicDimVector, extent::Extents.Extent) = Base.$f(A, dims2indices(A, extent)...) - @propagate_inbounds Base.$f(A::AbstractBasicDimVector, extent::Union{Extents.Extent,Near{<:Extents.Extent},Touches{<:Extents.Extent}}) = + @propagate_inbounds Base.$f(A::AbstractBasicDimArray, extent::Extents.Extent) = Base.$f(A, dims2indices(A, extent)...) - @propagate_inbounds Base.$f(A::AbstractBasicDimArray, extent::Union{Extents.Extent,Near{<:Extents.Extent},Touches{<:Extents.Extent}}) = - Base.$f(A, dims2indices(A, extent)...) - # All Dimension indexing modes combined - @propagate_inbounds Base.$f(A::AbstractBasicDimArray; kw...) = - $_dim_f(A, _simplify_dim_indices(kw2dims(values(kw))...,)...) + + ### Dimension indexing + @propagate_inbounds function Base.$f(A::AbstractBasicDimArray; kw...) + # Need to use one method and check keywords to avoid method overwrites + if isempty(kw) + rebuildsliced(Base.$f, A, ()) + else + $_dim_f(A, _simplify_dim_indices(kw2dims(values(kw))...,)...) + end + end @propagate_inbounds Base.$f(A::AbstractBasicDimArray, d1::DimensionalIndices; kw...) = $_dim_f(A, _simplify_dim_indices(d1, kw2dims(values(kw))...)...) @propagate_inbounds Base.$f(A::AbstractBasicDimArray, d1::DimensionalIndices, d2::DimensionalIndices, D::DimensionalIndices...; kw...) = $_dim_f(A, _simplify_dim_indices(d1, d2, D..., kw2dims(values(kw))...)...) - @propagate_inbounds Base.$f(A::AbstractDimArray, i1::DimensionalIndices, i2::DimensionalIndices, I::DimensionalIndices...) = - $_dim_f(A, _simplify_dim_indices(i1, i2, I...)...) - @propagate_inbounds Base.$f(A::AbstractDimArray, i1::_DimIndicesAmb, i2::_DimIndicesAmb, I::_DimIndicesAmb...) = - $_dim_f(A, _simplify_dim_indices(i1, i2, I...)...) - @propagate_inbounds Base.$f(A::AbstractDimVector, i::DimensionalIndices) = - $_dim_f(A, _simplify_dim_indices(i)...) @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::DimensionalIndices) = $_dim_f(A, _simplify_dim_indices(i)...) - # For ambiguity - @propagate_inbounds Base.$f(A::AbstractDimArray, i::DimIndices) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArray, i::DimSelectors) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArray, i::_DimIndicesAmb) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimVector, i::DimIndices) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimVector, i::DimSelectors) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimVector, i::_DimIndicesAmb) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i::DimIndices) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i::DimSelectors) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractBasicDimArray, i::_DimIndicesAmb) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::DimIndices) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::DimSelectors) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractBasicDimVector, i::_DimIndicesAmb) = $_dim_f(A, i) - - # Use underscore methods to minimise ambiguities - @propagate_inbounds $_dim_f(A::AbstractBasicDimArray, ds::DimTuple) = - $_dim_f(A, ds...) + + # All dimension indexing is passed to these underscore methods to minimise ambiguities + @propagate_inbounds $_dim_f(A::AbstractBasicDimArray, ds::DimTuple) = $_dim_f(A, ds...) @propagate_inbounds $_dim_f(A::AbstractBasicDimArray, d1::Dimension, ds::Dimension...) = Base.$f(A, dims2indices(A, (d1, ds...))...) - @propagate_inbounds $_dim_f(A::AbstractBasicDimArray, ds::Dimension...) = - Base.$f(A, dims2indices(A, ds)...) + # Regular non-dimensional indexing + @propagate_inbounds $_dim_f(A::AbstractBasicDimArray, I...) = Base.$f(A, I...) + # Catch the edge case dims were passed but did not match - + # we want to index with all colons [:, :, ...], not [] + @propagate_inbounds $_dim_f(A::AbstractBasicDimArray{<:Any,N}) where N = + rebuildsliced(Base.$f, A, ntuple(i -> Colon(), Val(N))) @propagate_inbounds function $_dim_f( A::AbstractBasicDimArray, d1::Union{Dimension,DimensionIndsArrays}, @@ -114,35 +89,41 @@ for f in (:getindex, :view, :dotview) ) return merge_and_index(Base.$f, A, (d1, ds...)) end - end - # Standard indices - if f == :view - @eval @propagate_inbounds function Base.$f(A::AbstractDimArray, i1::StandardIndices, i2::StandardIndices, I::StandardIndices...) - I = to_indices(A, (i1, i2, I...)) - x = Base.$f(parent(A), I...) - rebuildsliced(Base.$f, A, x, I) - end - else - @eval @propagate_inbounds function Base.$f(A::AbstractDimArray, i1::StandardIndices, i2::StandardIndices, Is::StandardIndices...) - I = to_indices(A, (i1, i2, Is...)) - x = Base.$f(parent(A), I...) - all(i -> i isa Integer, I) ? x : rebuildsliced(Base.$f, A, x, I) + @propagate_inbounds function $_dim_f(A::AbstractBasicDimArray{<:Any,0}, d1::Dimension, ds::Dimension...) + Dimensions._extradimswarn((d1, ds...)) + return rebuildsliced(Base.$f, A, ()) end end + + ##### AbstractDimArray only methods + # Here we know we can just index into the parent object + # Linear indexing forwards to the parent array as it will break the dimensions + # AbstractBasicDimArray must defined their own methods + @eval @propagate_inbounds Base.$f(A::AbstractDimArray, i::Union{Colon,AbstractArray{<:Integer}}) = + Base.$f(parent(A), i) + # Except for AbstractDimVector + @eval @propagate_inbounds Base.$f(A::AbstractDimVector, i::Union{Colon,AbstractArray{<:Integer}}) = + rebuildsliced(Base.$f, A, (i,)) + if f in (:getindex, :dotview) + # We only define getindex with Integer on AbstractDimArray + # AbstractBasicDimArray must defined their own + @eval @propagate_inbounds Base.$f(A::AbstractDimVector, i::Integer) = Base.$f(parent(A), i) + @eval @propagate_inbounds Base.$f(A::AbstractDimArray, i::Integer) = Base.$f(parent(A), i) + @eval @propagate_inbounds Base.$f(A::AbstractDimArray, i1::Integer, i2::Integer, I::Integer...) = + Base.$f(parent(A), i1, i2, I...) + @eval @propagate_inbounds Base.$f(A::AbstractDimArray) = Base.$f(parent(A)) + end # Special case zero dimensional arrays being indexed with missing dims if f == :getindex # Catch this before the dimension is converted to () - @eval @propagate_inbounds function $_dim_f(A::AbstractDimArray{<:Any,0}) - return rebuild(A, fill(A[])) - end - @eval @propagate_inbounds function $_dim_f(A::AbstractDimArray{<:Any,0}, d1::Dimension, ds::Dimension...) + @eval $_dim_f(A::AbstractDimArray{<:Any,0}) = rebuild(A, fill(A[])) + @eval function $_dim_f(A::AbstractDimArray{<:Any,0}, d1::Dimension, ds::Dimension...) Dimensions._extradimswarn((d1, ds...)) return rebuild(A, fill(A[])) end end end - function merge_and_index(f, A, dims) dims, inds_arrays = _separate_dims_arrays(_simplify_dim_indices(dims...)...) # No arrays here, so abort (dispatch is tricky...) @@ -263,3 +244,7 @@ Base.@assume_effects :foldable @inline _simplify_dim_indices() = () view(A, args...; kw...) @propagate_inbounds Base.maybeview(A::AbstractDimArray, args::Vararg{Union{Number,Base.AbstractCartesianIndex}}; kw...) = view(A, args...; kw...) + +# We only own this to_indices dispatch for AbstractBasicDimArray +Base.to_indices(A::AbstractBasicDimArray, inds, (r, args...)::Tuple{<:Type,Vararg}) = + (Lookups._to_index(inds[1], r), to_indices(A, Base.tail(inds), args)...) \ No newline at end of file diff --git a/src/array/methods.jl b/src/array/methods.jl index 27a1cdc35..dec9f8c4f 100644 --- a/src/array/methods.jl +++ b/src/array/methods.jl @@ -90,7 +90,7 @@ end function Base.dropdims(A::AbstractDimArray; dims) dims = DD.dims(A, dims) data = Base.dropdims(parent(A); dims=dimnum(A, dims)) - rebuildsliced(A, data, _dropinds(A, dims)) + rebuildsliced(view, A, data, _dropinds(A, dims)) end @inline _dropinds(A, dims::Tuple) = dims2indices(A, map(d -> rebuild(d, 1), dims)) @@ -582,7 +582,8 @@ end r = axes(A) # Copied from Base.diff r0 = ntuple(i -> i == dims ? UnitRange(1, last(r[i]) - 1) : UnitRange(r[i]), N) - rebuildsliced(A, diff(parent(A); dims=dimnum(A, dims)), r0) + data = diff(parent(A); dims=dimnum(A, dims)) + rebuildsliced(getindex, A, data, r0) end # Forward `replace` to parent objects diff --git a/src/dimindices.jl b/src/dimindices.jl index 5b0779b18..fee1062b2 100644 --- a/src/dimindices.jl +++ b/src/dimindices.jl @@ -1,16 +1,21 @@ +""" + AbstractDimArrayGenerator <: AbstractBasicDimArray +Abstract supertype for all AbstractBasicDimArrays that +generate their `data` on demand during `getindex`. +""" abstract type AbstractDimArrayGenerator{T,N,D} <: AbstractBasicDimArray{T,N,D} end dims(dg::AbstractDimArrayGenerator) = dg.dims +# Dims that contribute to the element type. +# May be larger than `dims` after slicing +eldims(di::AbstractDimArrayGenerator) = dims((dims(di)..., refdims(di)...), orderdims(di)) +eldims(di::AbstractDimArrayGenerator, d) = dims(eldims(di), d) + Base.size(dg::AbstractDimArrayGenerator) = map(length, dims(dg)) Base.axes(dg::AbstractDimArrayGenerator) = map(d -> axes(d, 1), dims(dg)) -Base.similar(A::AbstractDimArrayGenerator, ::Type{T}, D::DimTuple) where T = - dimconstructor(D)(A; data=similar(Array{T}, size(D)), dims=D, refdims=(), metadata=NoMetadata()) -Base.similar(A::AbstractDimArrayGenerator, ::Type{T}, D::Tuple{}) where T = - dimconstructor(D)(A; data=similar(Array{T}, ()), dims=(), refdims=(), metadata=NoMetadata()) - @inline Base.permutedims(A::AbstractDimArrayGenerator{<:Any,2}) = rebuild(A; dims=reverse(dims(A))) @inline Base.permutedims(A::AbstractDimArrayGenerator{<:Any,1}) = @@ -25,7 +30,34 @@ end rebuild(A; dims=dims(dims(A), Tuple(perm))) end -abstract type AbstractDimIndices{T,N,D} <: AbstractDimArrayGenerator{T,N,D} end +""" + AbstractRebuildableDimArrayGenerator <: AbstractDimArrayGenerator + +Abstract supertype for all AbstractDimArrayGenerator that +can be rebuilt when subsetted with `view` or `getindex`. + +These arrays must have `dims` and `refdims` fields that defined the data +They do not need to define `rebuildsliced` methods as this is defined +as simply doing `slicedims` on `dims` and `refdims` and rebuilding. +""" +abstract type AbstractRebuildableDimArrayGenerator{T,N,D,R<:MaybeDimTuple} <: AbstractDimArrayGenerator{T,N,D} end + +refdims(A::AbstractRebuildableDimArrayGenerator) = A.refdims + +_refdims_firsts(A::AbstractRebuildableDimArrayGenerator) = map(d -> rebuild(d, first(d)), refdims(A)) + +# Custom rebuildsliced where data is ignored, and just dims and refdims are slices +# This makes sense for AbstractRebuildableDimArrayGenerator because Arrays are +# generated in getindex from the dims/refdims combination. +# `f` is ignored, and views are always used +@propagate_inbounds function rebuildsliced(f::Function, A::AbstractRebuildableDimArrayGenerator, I) + dims, refdims = slicedims(view, A, I) + return rebuild(A; dims, refdims) +end + +abstract type AbstractDimIndices{T,N,D,R,O<:MaybeDimTuple} <: AbstractRebuildableDimArrayGenerator{T,N,D,R} end + +orderdims(di::AbstractDimIndices) = di.orderdims """ DimIndices <: AbstractArray @@ -85,48 +117,52 @@ julia> A[di] # Index A with these indices 0.6 0.745673 0.692209 ``` """ -struct DimIndices{T,N,D<:Tuple{Vararg{Dimension}}} <: AbstractDimIndices{T,N,D} +struct DimIndices{T<:MaybeDimTuple,N,D,R,O} <: AbstractDimIndices{T,N,D,R,O} dims::D + refdims::R + orderdims::O # Manual inner constructor for ambiguity only - function DimIndices{T,N,D}(dims::Tuple{Vararg{Dimension}}) where {T,N,D<:Tuple{Vararg{Dimension}}} - new{T,N,D}(dims) + function DimIndices(dims::D, refdims::R, orderdims::O) where {D<:MaybeDimTuple,R<:MaybeDimTuple,O<:MaybeDimTuple} + eldims = DD.dims((dims..., refdims...), orderdims) + T = typeof(map(d -> rebuild(d, 1), eldims)) + N = length(dims) + new{T,N,D,R,O}(dims, refdims, orderdims) end end -function DimIndices(dims::D) where {D<:Tuple{Vararg{Dimension}}} - T = typeof(map(d -> rebuild(d, 1), dims)) - N = length(dims) - dims = N > 0 ? _dimindices_format(dims) : dims - DimIndices{T,N,typeof(dims)}(dims) +function DimIndices(dims::MaybeDimTuple) + dims = length(dims) > 0 ? _dimindices_format(dims) : dims + return DimIndices(dims, (), basedims(dims)) end DimIndices(x) = DimIndices(dims(x)) DimIndices(dim::Dimension) = DimIndices((dim,)) DimIndices(::Nothing) = throw(ArgumentError("Object has no `dims` method")) # Forces multiple indices not linear -function Base.getindex(di::DimIndices, i1::Integer, i2::Integer, I::Integer...) - map(dims(di), (i1, i2, I...)) do d, i +function Base.getindex(A::DimIndices, i1::Integer, i2::Integer, I::Integer...) + dis = map(dims(A), (i1, i2, I...)) do d, i rebuild(d, d[i]) end + dims((dis..., _refdims_firsts(A)...), orderdims(A)) end # Dispatch to avoid linear indexing in multidimensional DimIndices -function Base.getindex(di::DimIndices{<:Any,1}, i::Integer) - d = dims(di, 1) - (rebuild(d, d[i]),) +function Base.getindex(A::DimIndices{<:Any,1}, i::Integer) + d = dims(A, 1) + di = rebuild(d, d[i]) + return dims((di, _refdims_firsts(A)...), orderdims(A)) end +Base.getindex(A::DimIndices{<:Any,0}) = dims(_refdims_firsts(A), orderdims(A)) _dimindices_format(dims::Tuple{}) = () _dimindices_format(dims::Tuple) = map(rebuild, dims, map(_dimindices_axis, dims)) -# Allow only CartesianIndices arguments _dimindices_axis(x::Integer) = Base.OneTo(x) _dimindices_axis(x::AbstractRange{<:Integer}) = x -# And Lookup, which we take the axes from _dimindices_axis(x::Dimension) = _dimindices_axis(val(x)) _dimindices_axis(x::Lookup) = axes(x, 1) _dimindices_axis(x) = throw(ArgumentError("`$x` is not a valid input for `DimIndices`. Use `Dimension`s wrapping `Integer`, `AbstractArange{<:Integer}`, or a `Lookup` (the `axes` will be used)")) -abstract type AbstractDimVals{T,N,D} <: AbstractDimIndices{T,N,D} end +abstract type AbstractDimVals{T,N,D,R,O} <: AbstractDimIndices{T,N,D,R,O} end (::Type{T})(::Nothing; kw...) where T<:AbstractDimVals = throw(ArgumentError("Object has no `dims` method")) (::Type{T})(x; kw...) where T<:AbstractDimVals = T(dims(x); kw...) @@ -150,34 +186,40 @@ that defines a `dims` method can be passed in. - `order`: determines the order of the points, the same as the order of `dims` by default. """ -struct DimPoints{T,N,D<:Tuple{Vararg{Dimension}},O} <: AbstractDimVals{T,N,D} +struct DimPoints{T<:Tuple,N,D,R,O} <: AbstractDimVals{T,N,D,R,O} dims::D - order::O + refdims::R + orderdims::O + function DimPoints(dims::D, refdims::R, orderdims::O) where {D<:MaybeDimTuple,R<:MaybeDimTuple,O<:MaybeDimTuple} + eldims = DD.dims((dims..., refdims...), orderdims) + T = Tuple{map(eltype, eldims)...} + N = length(dims) + new{T,N,D,R,O}(dims, refdims, orderdims) + end end DimPoints(dims::Tuple; order=dims) = DimPoints(dims, order) function DimPoints(dims::Tuple, order::Tuple) - order = map(d -> basetypeof(d)(), order) - T = Tuple{map(eltype, dims)...} - N = length(dims) - dims = N > 0 ? _format(dims) : dims - DimPoints{T,N,typeof(dims),typeof(order)}(dims, order) + dims = length(dims) > 0 ? format(dims) : dims + DimPoints(dims, (), basedims(order)) end -function Base.getindex(dp::DimPoints, i1::Integer, i2::Integer, I::Integer...) +function Base.getindex(A::DimPoints, i1::Integer, i2::Integer, I::Integer...) # Get dim-wrapped point values at i1, I... - pointdims = map(dims(dp), (i1, i2, I...)) do d, i + pointdims = map(dims(A), (i1, i2, I...)) do d, i rebuild(d, d[i]) end # Return the unwrapped point sorted by `order - return map(val, DD.dims(pointdims, dp.order)) + return map(val, DD.dims((pointdims..., _refdims_firsts(A)...), orderdims(A))) end -Base.getindex(di::DimPoints{<:Any,1}, i::Integer) = (dims(di, 1)[i],) - -_format(::Tuple{}) = () -function _format(dims::Tuple) - ax = map(d -> axes(val(d), 1), dims) - return format(dims, ax) +function Base.getindex(A::DimPoints{<:Any,1}, i::Integer) + # Get dim-wrapped point values at i1, I... + d1 = dims(A, 1) + pointdim = rebuild(d1, d1[i]) + # Return the unwrapped point sorted by `order + D = dims((pointdim, _refdims_firsts(A)...), orderdims(A)) + return map(val, D) end +Base.getindex(A::DimPoints{<:Any,0}) = map(val, dims(_refdims_firsts(A), orderdims(A))) """ DimSelectors <: AbstractArray @@ -226,19 +268,49 @@ Using `At` would make sure we only use exact interpolation, while `Contains` with sampling of `Intervals` would make sure that each values is taken only from an Interval that is present in the lookups. """ -struct DimSelectors{T,N,D<:Tuple{Vararg{Dimension}},S<:Tuple} <: AbstractDimVals{T,N,D} +struct DimSelectors{T<:MaybeDimTuple,N,D,R,O,S<:Tuple} <: AbstractDimVals{T,N,D,R,O} dims::D + refdims::R + orderdims::O selectors::S + function DimSelectors(dims::D, refdims::R, orderdims::O, selectors::S) where {D<:Tuple,R<:Tuple,O<:Tuple,S<:Tuple} + eldims = DD.dims((dims..., refdims...), orderdims) + T = _selector_eltype(eldims, selectors) + N = length(dims) + new{T,N,D,R,O,S}(dims, refdims, orderdims, selectors) + end end -function DimSelectors(dims::Tuple{Vararg{Dimension}}; atol=nothing, selectors=At()) +function DimSelectors(dims::MaybeDimTuple; atol=nothing, selectors=At()) s = _format_selectors(dims, selectors, atol) DimSelectors(dims, s) end -function DimSelectors(dims::Tuple{Vararg{Dimension}}, selectors::Tuple) - T = _selector_eltype(dims, selectors) - N = length(dims) - dims = N > 0 ? _format(dims) : dims - DimSelectors{T,N,typeof(dims),typeof(selectors)}(dims, selectors) +function DimSelectors(dims::MaybeDimTuple, selectors::Tuple) + dims = length(dims) > 0 ? format(dims) : dims + orderdims = basedims(dims) + refdims = () + length(dims) == length(selectors) || throw(ArgumentError("`length(dims) must match `length(selectors)`, got $(length(dims)) and $(length(selectors))")) + DimSelectors(dims, refdims, orderdims, selectors) +end + +@propagate_inbounds function Base.getindex(A::DimSelectors, i1::Integer, i2::Integer, I::Integer...) + D = map(dims(A), (i1, i2, I...)) do d, i + rebuild(d, d[i]) + end + return _rebuild_selectors(A, D) +end +@propagate_inbounds function Base.getindex(A::DimSelectors{<:Any,1}, i::Integer) + d1 = dims(A, 1) + d = rebuild(d1, d1[i]) + return _rebuild_selectors(A, (d,)) +end +@propagate_inbounds Base.getindex(A::DimSelectors{<:Any,0}) = + _rebuild_selectors(A, ()) + +function _rebuild_selectors(A, D) + sorteddims = dims((D..., _refdims_firsts(A)...), orderdims(A)) + map(sorteddims, A.selectors) do d, s + rebuild(d, rebuild(s; val=val(d))) + end end _selector_eltype(dims::Tuple, selectors::Tuple) = @@ -268,8 +340,7 @@ end _format_selectors(dims, selectors, map(_ -> atol, dims)) @inline _format_selectors(dims::Tuple, selectors::Tuple, atol::Tuple) = map(_format_selectors, dims, selectors, atol) - -_format_selectors(d::Dimension, T::Type, atol) = _format_selectors(d, T(), atol) +@inline _format_selectors(d::Dimension, T::Type, atol) = _format_selectors(d, T(), atol) @inline _format_selectors(d::Dimension, ::Near, atol) = Near(nothing) @inline _format_selectors(d::Dimension, ::Contains, atol) = Contains(nothing) @inline function _format_selectors(d::Dimension, at::At, atol) @@ -282,68 +353,127 @@ _atol(T::Type{<:AbstractFloat}, atol, ::Nothing) = atol _atol(T::Type{<:AbstractFloat}, ::Nothing, atol) = atol _atol(T::Type{<:AbstractFloat}, ::Nothing, ::Nothing) = eps(T) -@propagate_inbounds function Base.getindex(di::DimSelectors, i1::Integer, i2::Integer, I::Integer...) - map(dims(di), di.selectors, (i1, i2, I...)) do d, s, i - rebuild(d, rebuild(s; val=d[i])) # At selector with the value at i - end -end -@propagate_inbounds function Base.getindex(di::DimSelectors{<:Any,1}, i::Integer) - d = dims(di, 1) - (rebuild(d, rebuild(di.selectors[1]; val=d[i])),) -end - # Deprecated const DimKeys = DimSelectors -struct DimSlices{T,N,D<:Tuple{Vararg{Dimension}},P} <: AbstractDimArrayGenerator{T,N,D} +const SliceDim = Dimension{<:Union{<:AbstractVector{Int},<:AbstractVector{<:AbstractVector{Int}}}} + +""" + DimSlices <: AbstractRebuildableDimArrayGenerator + + DimSlices(x, dims; drop=true) + +A `Base.Slices` like object for returning view slices from a DimArray. + +This is used for `eachslice` on stacks. + +`dims` must be a `Tuple` of `Dimension` holding `AbstractVector{Int}` +or `AbstractVector{<:AbstractVector{Int}}`. + +# Keywords + +- `drop`: whether to drop dimensions from the outer array or keep the + same dimensions as the inner view, but with length 1. +""" +struct DimSlices{T,N,D,R,P,U} <: AbstractRebuildableDimArrayGenerator{T,N,D,R} _data::P dims::D + refdims::R + reduced::U end DimSlices(x; dims, drop=true) = DimSlices(x, dims; drop) -function DimSlices(x, dims; drop=true) - newdims = if length(dims) == 0 - map(d -> rebuild(d, :), DD.dims(x)) +DimSlices(x, dim; kw...) = DimSlices(x, (dim,); kw...) +function DimSlices(x, dims::Tuple; drop::Union{Bool,Nothing}=nothing) + dims = DD.dims(x, dims) + refdims = () + inds = if length(dims) == 0 + map(d -> rebuild(d, :), DD.dims(x)) + else + map(d -> rebuild(d, firstindex(d)), dims) + end + slicedims, reduced = if isnothing(drop) || drop + # We have to handle filling in colons for no dims because passing + # no dims at all is owned by base to mean A[] not A[D1(:), D2(:), D3(:)] + dims, () else - dims - end - inds = map(newdims) do d - rebuild(d, first(d)) - end - # `getindex` returns these views + # Get other dimensions as length 1 + reduced = map(otherdims(x, dims)) do o + reducedims(o) + end + # Re-sort to x dim order + slicedims = DD.dims((reduced..., dims...), DD.dims(x)) + sliceddims, basedims(reduced) + end T = typeof(view(x, inds...)) - N = length(newdims) - D = typeof(newdims) - P = typeof(x) - return DimSlices{T,N,D,P}(x, newdims) + N = length(slicedims) + D = typeof(slicedims) + R = typeof(refdims) + A = typeof(x) + U = typeof(reduced) + return DimSlices{T,N,D,R,A,U}(x, slicedims, refdims, reduced) end -rebuild(ds::A; dims) where {A<:DimSlices{T,N}} where {T,N} = - DimSlices{T,N,typeof(dims),typeof(ds._data)}(ds._data, dims) +function rebuild(ds::DimSlices{T,N}; + dims::D, refdims::R, reduced::U=ds.reduced +) where {T,N,D,R,U} + A = typeof(ds._data) + DimSlices{T,N,D,R,A,U}(ds._data, dims, refdims, reduced) +end +@propagate_inbounds function rebuildsliced(::Function, A::DimSlices, I) + @boundscheck checkbounds(A, I...) + # We use `unafe_view` to force always wrapping as a view, even for ranges + # Then in `_refdims_firsts` we can use `first(parentindices(d))` to get the offset + dims, refdims = slicedims(Base.unsafe_view, A, I) + return rebuild(A; dims, refdims) +end + +# We need to get the vist index from the view, so define this custom for DimSlices +_refdims_firsts(A::DimSlices) = map(d -> rebuild(d, first(parentindices(d))), refdims(A)) function Base.summary(io::IO, A::DimSlices{T,N}) where {T,N} print_ndims(io, size(A)) print(io, string(nameof(typeof(A)), "{$(nameof(T)),$N}")) end -@propagate_inbounds function Base.getindex(ds::DimSlices, i1::Integer, i2::Integer, Is::Integer...) +@propagate_inbounds function Base.getindex(A::DimSlices, i1::Integer, i2::Integer, Is::Integer...) I = (i1, i2, Is...) - @boundscheck checkbounds(ds, I...) - D = map(dims(ds), I) do d, i - rebuild(d, d[i]) + D = map(dims(A), I) do d, i + i1 = if hasdim(A.reduced, d) + @boundscheck checkbounds(d, i) + Colon() + else + eachindex(d)[i] + end + return rebuild(d, i1) end - return view(ds._data, D...) + R = _refdims_firsts(A) + return view(A._data, D..., R...) end # Dispatch to avoid linear indexing in multidimensional DimIndices -@propagate_inbounds function Base.getindex(ds::DimSlices{<:Any,1}, i::Integer) - d = dims(ds, 1) - return view(ds._data, rebuild(d, d[i])) +@propagate_inbounds function Base.getindex(A::DimSlices{<:Any,1}, i::Integer) + d1 = dims(A, 1) + d = if hasdim(A.reduced, d1) + @boundscheck checkbounds(d1, i) + rebuild(d1, :) + else + rebuild(d1, eachindex(d1)[i]) + end + return view(A._data, d, _refdims_firsts(A)...) +end +@propagate_inbounds function Base.getindex(A::DimSlices{<:Any,0}) + R = _refdims_firsts(A) + # Need to manually force the Colons in case there are no dims at all + D = map(otherdims(A._data, R)) do d + rebuild(d, :) + end + view(A._data, D..., R...) end # Extends the dimensions of any `AbstractBasicDimArray` # as if the array assigned into a larger array across all dimensions, # but without the copying. Theres is a cost for linear indexing these objects # as we need to convert to Cartesian. -struct DimExtensionArray{T,N,D<:Tuple{Vararg{Dimension}},R<:Tuple{Vararg{Dimension}},A<:AbstractBasicDimArray{T}} <: AbstractDimArrayGenerator{T,N,D} +struct DimExtensionArray{T,N,D<:MaybeDimTuple,R<:MaybeDimTuple,A<:AbstractBasicDimArray{T}} <: AbstractDimArrayGenerator{T,N,D} _data::A dims::D refdims::R @@ -361,71 +491,41 @@ DimExtensionArray(A::AbstractBasicDimArray, dims::Tuple; refdims=refdims(A)) = name(A::DimExtensionArray) = name(A._data) metadata(A::DimExtensionArray) = metadata(A._data) -# Indexing that returns a new object with the same number of dims -for f in (:getindex, :dotview, :view) - __f = Symbol(:__, f) - T = Union{Colon,AbstractRange} - # For ambiguity - @eval @propagate_inbounds function Base.$f(de::DimExtensionArray{<:Any,1}, i::Integer) - if ndims(parent(de)) == 0 - $f(de._data) - else - $f(de._data, i) - end - end - @eval @propagate_inbounds function Base.$f(di::DimExtensionArray{<:Any,1}, i::Union{AbstractRange,Colon}) - rebuild(di; _data=di.data[i], dims=(dims(di, 1)[i],)) - end - # For ambiguity - @eval @propagate_inbounds function Base.$f(de::DimExtensionArray, i1::$T, i2::$T, Is::$T...) - $__f(de, i1, i2, Is...) - end - @eval @propagate_inbounds function Base.$f(de::DimExtensionArray, i1::StandardIndices, i2::StandardIndices, Is::StandardIndices...) - $__f(de, i1, i2, Is...) - end - @eval @propagate_inbounds function Base.$f( - de::DimensionalData.DimExtensionArray, - i1::Union{AbstractArray{Union{}}, DimensionalData.DimIndices{<:Integer}, DimensionalData.DimSelectors{<:Integer}}, - i2::Union{AbstractArray{Union{}}, DimensionalData.DimIndices{<:Integer}, DimensionalData.DimSelectors{<:Integer}}, - Is::Vararg{Union{AbstractArray{Union{}}, DimensionalData.DimIndices{<:Integer}, DimensionalData.DimSelectors{<:Integer}}} - ) - $__f(de, i1, i2, Is...) - end - @eval Base.@assume_effects :foldable @propagate_inbounds function $__f(de::DimExtensionArray, i1, i2, Is...) - I = (i1, i2, Is...) - newdims, newrefdims = slicedims(dims(de), refdims(de), I) - D = map(rebuild, dims(de), I) - A = de._data - realdims = dims(D, dims(A)) - if all(map(d -> val(d) isa Colon, realdims)) - rebuild(de; dims=newdims, refdims=newrefdims) - else - newrealparent = begin - x = parent(A)[dims2indices(A, realdims)...] - x isa AbstractArray ? x : fill(x) - end - newrealdims = dims(newdims, realdims) - newdata = rebuild(A; data=newrealparent, dims=newrealdims) - rebuild(de; _data=newdata, dims=newdims, refdims=newrefdims) +@propagate_inbounds function rebuildsliced(f::Function, de::DimExtensionArray, I) + newdims, newrefdims = slicedims(dims(de), refdims(de), I) + D = map(rebuild, dims(de), I) + A = de._data + realdims = dims(D, dims(A)) + if all(map(d -> val(d) isa Colon, realdims)) + rebuild(de; dims=newdims, refdims=newrefdims) + else + newrealparent = begin + x = f(parent(A), dims2indices(A, realdims)...) + x isa AbstractArray ? x : fill(x) end - end - @eval @propagate_inbounds function $__f(de::DimExtensionArray{<:Any,1}, i::$T) - newdims, _ = slicedims(dims(de), (i,)) - A = de._data - D = rebuild(only(dims(de)), i) - rebuild(de; dims=newdims, _data=A[D...]) + newrealdims = dims(newdims, realdims) + newdata = rebuild(A; data=newrealparent, dims=newrealdims) + rebuild(de; _data=newdata, dims=newdims, refdims=newrefdims) end end -for f in (:getindex, :dotview) - __f = Symbol(:__, f) - @eval function $__f(de::DimExtensionArray, i1::Int, i2::Int, Is::Int...) - D = map(rebuild, dims(de), (i1, i2, Is...)) - A = de._data - return $f(A, dims(D, dims(A))...) - end - @eval $__f(de::DimExtensionArray{<:Any,1}, i::Int) = $f(de._data, rebuild(dims(de, 1), i)) +@propagate_inbounds function rebuildsliced( + f::Function, de::DimExtensionArray{<:Any,1}, I::Tuple{<:Union{Colon,AbstractRange}} +) + newdims, _ = slicedims(dims(de), I) + A = de._data + D = rebuild(only(dims(de)), only(I)) + rebuild(de; dims=newdims, _data=A[D...]) end +# Integer indexing +function Base.getindex(de::DimExtensionArray, i1::Integer, i2::Integer, Is::Integer...) + D = map(rebuild, dims(de), (i1, i2, Is...)) + A = de._data + return getindex(A, dims(D, dims(A))...) +end +Base.getindex(de::DimExtensionArray{<:Any,1}, i::Integer) = getindex(de._data, rebuild(dims(de, 1), i)) +Base.getindex(de::DimExtensionArray{<:Any,0}) = de._data[] + function mergedims(A::DimExtensionArray, dim_pairs::Pair...) all_dims = dims(A) dims_new = mergedims(all_dims, dim_pairs...) @@ -434,47 +534,4 @@ function mergedims(A::DimExtensionArray, dim_pairs::Pair...) Aperm = PermutedDimsArray(A, dims_perm) data_merged = reshape(parent(Aperm), map(length, dims_new)) return DimArray(data_merged, dims_new) -end - -const SelectorOrStandard = Union{SelectorOrInterval,StandardIndices} -const DimensionIndsArrays = Union{AbstractArray{<:Dimension},AbstractArray{<:DimTuple}} -const DimensionalIndices = Union{DimTuple,DimIndices,DimSelectors,Dimension,DimensionIndsArrays} -const _DimIndicesAmb = Union{AbstractArray{Union{}},DimIndices{<:Integer},DimSelectors{<:Integer}} - -# Indexing that returns a new object with the same number of dims -for f in (:getindex, :dotview, :view) - T = Union{Colon,AbstractVector} - _dim_f = Symbol(:_dim_, f) - @eval begin - @propagate_inbounds function Base.$f(di::AbstractDimArrayGenerator, i1::$T, i2::$T, Is::$T...) - I = (i1, i2, Is...) - newdims, _ = slicedims(dims(di), I) - rebuild(di; dims=newdims) - end - @propagate_inbounds function Base.$f( - di::AbstractDimArrayGenerator, - i1::DimensionalIndices, - i2::DimensionalIndices, - Is::DimensionalIndices... - ) - $_dim_f(di, i1, i2, Is...) - end - @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator, i::DimIndices) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator, i::DimSelectors) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator, i::DimensionalIndices) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator, i::_DimIndicesAmb) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator{<:Any,1}, i::DimIndices) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator{<:Any,1}, i::DimSelectors) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator{<:Any,1}, i::DimensionalIndices) = $_dim_f(A, i) - @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator{<:Any,1}, i::_DimIndicesAmb) = $_dim_f(A, i) - @propagate_inbounds Base.$f(di::AbstractDimArrayGenerator{<:Any,1}, i::$T) = - rebuild(di; dims=(dims(di, 1)[i],)) - @propagate_inbounds Base.$f(dg::AbstractDimArrayGenerator, i::Integer) = - Base.$f(dg, Tuple(CartesianIndices(dg)[i])...) - end - if f == :view - @eval @propagate_inbounds Base.$f(A::AbstractDimArrayGenerator) = A - else - @eval @propagate_inbounds Base.$f(::AbstractDimArrayGenerator) = () - end -end +end \ No newline at end of file diff --git a/src/groupby.jl b/src/groupby.jl index bfcb63cdd..a73e5a9ca 100644 --- a/src/groupby.jl +++ b/src/groupby.jl @@ -11,24 +11,47 @@ This wrapper allows for specialisations on later broadcast or reducing operations, e.g. for chunk reading with DiskArrays.jl, because we know the data originates from a single array. """ -struct DimGroupByArray{T,N,D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me} <: AbstractDimArray{T,N,D,A} - data::A +struct DimGroupByArray{T,N,D,R,A,Na,Me} <: AbstractRebuildableDimArrayGenerator{T,N,D,R} + _data::A dims::D refdims::R name::Na metadata::Me - function DimGroupByArray( - data::A, dims::D, refdims::R, name::Na, metadata::Me - ) where {D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me} where {T,N} - checkdims(data, dims) - new{T,N,D,R,A,Na,Me}(data, dims, refdims, name, metadata) - end +end +function DimGroupByArray(_data::A, dims::D, refdims::R, name::Na, metadata::Me) where {D,R,A,Na,Me} + T = typeof(view(_data, firstgroupinds(dims)...)) + N = length(dims) + DimGroupByArray{T,N,D,R,A,Na,Me}(_data, dims, refdims, name, metadata) end function DimGroupByArray(data::AbstractArray, dims::Union{Tuple,NamedTuple}; refdims=(), name=NoName(), metadata=NoMetadata() ) DimGroupByArray(data, format(dims, data), refdims, name, metadata) end + +name(A::DimGroupByArray) = A.name +metadata(A::DimGroupByArray) = A.metadata +refdims(A::DimGroupByArray) = A.refdims + +function groupinds(A::DimGroupByArray, I::Integer...) + # Get the group indices for each dimension + D = groupinds(dims(A), I...) + # And the indices in refdims + R = map(refdims(A)) do d + rebuild(d, only(hiddenparent(d))) + end + # And combine them, dim indexing will fix the order later + return (D..., R...) +end +function groupinds(dims::MaybeDimTuple, I::Integer...) + map(dims, I) do d, i + rebuild(d, hiddenparent(d)[i]) + end +end + +firstgroupinds(dims::MaybeDimTuple) = + groupinds(dims, ntuple(_ -> 1, length(dims))...) + @inline function rebuild( A::DimGroupByArray, data::AbstractArray, dims::Tuple, refdims::Tuple, name, metadata ) @@ -41,6 +64,18 @@ end rebuild(A, data, dims, refdims, name, metadata) # Rebuild as a regular DimArray end +Base.size(A::DimGroupByArray) = map(length, dims(A)) + +@propagate_inbounds function Base.getindex(A::DimGroupByArray, i1::Integer, i2::Integer, Is::Integer...) + I = (i1, i2, Is...) + return view(A._data, groupinds(A, I...)...) +end +# Dispatch to avoid linear indexing in multidimensional DimIndices +@propagate_inbounds Base.getindex(A::DimGroupByArray{<:Any,1}, i::Integer) = + view(A._data, groupinds(A, i)...) +@propagate_inbounds Base.getindex(A::DimGroupByArray{<:Any,0}) = + view(A._data, groupinds(A)...) + function Base.summary(io::IO, A::DimGroupByArray{T,N}) where {T<:AbstractArray{T1,N1},N} where {T1,N1} print_ndims(io, size(A)) print(io, string(nameof(typeof(A)), "{$(nameof(T)){$T1,$N1},$N}")) @@ -83,18 +118,6 @@ function Base.show(io::IO, s::DimSummariser) end Base.alignment(io::IO, s::DimSummariser) = (textwidth(sprint(show, s)), 0) -# An array that doesn't know what it holds, to simplify dispatch -# It can also hold something that is not an AbstractArray itself. -struct OpaqueArray{T,N,P} <: AbstractArray{T,N} - parent::P -end -OpaqueArray(A::P) where P<:AbstractArray{T,N} where {T,N} = OpaqueArray{T,N,P}(A) -OpaqueArray(st::P) where P<:AbstractDimStack{<:Any,T,N} where {T,N} = OpaqueArray{T,N,P}(st) - -Base.size(A::OpaqueArray) = size(A.parent) -Base.getindex(A::OpaqueArray, args...) = Base.getindex(A.parent, args...) -Base.setindex!(A::OpaqueArray, args...) = Base.setindex!(A.parent, args...) - abstract type AbstractBins <: Function end @@ -252,6 +275,7 @@ julia> using DimensionalData, Dates julia> A = rand(X(1:0.1:20), Y(1:20), Ti(DateTime(2000):Day(3):DateTime(2003))); julia> groups = groupby(A, Ti => month) # Group by month +metadata = Dict{Symbol, Any}(:groupby => (:Ti => Dates.month)) ┌ 12-element DimGroupByArray{DimArray{Float64,3},1} ┐ ├───────────────────────────────────────────────────┴───────────── dims ┐ ↓ Ti Sampled{Int64} [1, 2, …, 11, 12] ForwardOrdered Irregular Points @@ -276,15 +300,16 @@ julia> groupmeans = mean.(groups) # Take the monthly mean ┌ 12-element DimArray{Float64, 1} ┐ ├─────────────────────────────────┴─────────────────────────────── dims ┐ ↓ Ti Sampled{Int64} [1, 2, …, 11, 12] ForwardOrdered Irregular Points -├───────────────────────────────────────────────────────────── metadata ┤ - Dict{Symbol, Any} with 1 entry: - :groupby => :Ti=>month └───────────────────────────────────────────────────────────────────────┘ 1 0.500064 2 0.499762 3 0.500083 4 0.499985 - ⋮ + 5 0.500511 + 6 0.500042 + 7 0.500003 + 8 0.500257 + 9 0.500868 10 0.500874 11 0.498704 12 0.50047 @@ -296,24 +321,29 @@ match after application of `mean`. ```jldoctest groupby julia> map(.-, groupby(A, Ti=>month), mean.(groupby(A, Ti=>month), dims=Ti)); +metadata = Dict{Symbol, Any}(:groupby => (:Ti => Dates.month)) +metadata = Dict{Symbol, Any}(:groupby => (:Ti => Dates.month)) ``` +ups Or do something else with Y: ```jldoctest groupby julia> groupmeans = mean.(groupby(A, Ti=>month, Y=>isodd)) +metadata = Dict{Symbol, Any}(:groupby => (:Ti => Dates.month, :Y => isodd)) ┌ 12×2 DimArray{Float64, 2} ┐ ├───────────────────────────┴────────────────────────────────────── dims ┐ ↓ Ti Sampled{Int64} [1, 2, …, 11, 12] ForwardOrdered Irregular Points, → Y Sampled{Bool} [false, true] ForwardOrdered Irregular Points -├────────────────────────────────────────────────────────────── metadata ┤ - Dict{Symbol, Any} with 1 entry: - :groupby => (:Ti=>month, :Y=>isodd) └────────────────────────────────────────────────────────────────────────┘ ↓ → false true 1 0.499594 0.500533 2 0.498145 0.501379 + 3 0.499871 0.500296 + 4 0.500921 0.49905 ⋮ + 8 0.499599 0.500915 + 9 0.500715 0.501021 10 0.501105 0.500644 11 0.498606 0.498801 12 0.501643 0.499298 @@ -329,28 +359,43 @@ function DataAPI.groupby( end return groupby(A, dims) end -function DataAPI.groupby(A::DimArrayOrStack, dimfuncs::DimTuple) +function DataAPI.groupby(A::DimArrayOrStack, dimfuncs::DimTuple; name=:groupby) length(otherdims(dimfuncs, dims(A))) > 0 && Dimensions._extradimserror(otherdims(dimfuncs, dims(A))) # Get groups for each dimension - dim_groups_indices = map(dimfuncs) do d + group_dims = map(dimfuncs) do d _group_indices(dims(A, d), DD.val(d)) end - # Separate lookups dims from indices - group_dims = map(first, dim_groups_indices) - # Get indices for each group wrapped with dims for indexing - indices = map(rebuild, group_dims, map(last, dim_groups_indices)) - - # Hide that the parent is a DimSlices - views = OpaqueArray(DimSlices(A, indices)) # Put the groupby query in metadata - meta = map(d -> name(d) => val(d), dimfuncs) + meta = map(d -> DD.name(d) => val(d), dimfuncs) metadata = Dict{Symbol,Any}(:groupby => length(meta) == 1 ? only(meta) : meta) # Return a DimGroupByArray - return DimGroupByArray(views, format(group_dims, views), (), :groupby, metadata) + return DimGroupByArray(A, map(format, group_dims), (), name, metadata) +end + +# An array that holds another secret array that is indexed along with it. +# We use this to put groupings inside lookups so they are handled by `slicedims` +struct HiddenVector{T,A<:AbstractArray{T,1},H<:AbstractArray{<:Any,1}} <: AbstractArray{T,1} + data::A + hidden::H end +Base.getindex(A::HiddenVector, i::Int) = getindex(A.data, i) +Base.view(A::HiddenVector, i::Int) = + HiddenVector(view(A.data, i...), view(A.hidden, i)) +for f in (:view, :getindex) + @eval Base.$f(A::HiddenVector, i::Union{Colon,AbstractArray}) = + HiddenVector(Base.$f(A.data, i), Base.$f(A.hidden, i)) +end +Base.parent(A::HiddenVector) = A.hidden +Base.size(A::HiddenVector) = size(A.data) + +hiddenparent(A::HiddenVector) = A.hidden +hiddenparent(A::Dimension) = hiddenparent(parent(A)) +hiddenparent(A::AbstractArray) = hiddenparent(parent(A)) +hiddenparent(A::Array) = error("HiddenVector not found") + # Define the groups and find all the indices for values that fall in them function _group_indices(dim::Dimension, f::Base.Callable; labels=nothing) orig_lookup = lookup(dim) @@ -363,9 +408,12 @@ function _group_indices(dim::Dimension, f::Base.Callable; labels=nothing) push!(inds, i) end ps = sort!(collect(pairs(indices_dict))) - group_dim = format(rebuild(dim, _maybe_label(labels, first.(ps)))) + lookupvals = _maybe_label(labels, first.(ps)) indices = last.(ps) - return group_dim, indices + # We combine lookup values and indices into on array + lookup_and_indices = HiddenVector(lookupvals, indices) + # And wrap and format them as a dimension + return format(rebuild(dim, lookup_and_indices)) end function _group_indices(dim::Dimension, group_lookup::Lookup; labels=nothing) orig_lookup = lookup(dim) @@ -374,13 +422,15 @@ function _group_indices(dim::Dimension, group_lookup::Lookup; labels=nothing) n = selectindices(group_lookup, Contains(v); err=Lookups._False()) isnothing(n) || push!(indices[n], i) end - group_dim = if isnothing(labels) - rebuild(dim, group_lookup) + lookupvals = if isnothing(labels) + group_lookup else - label_lookup = _maybe_label(labels, group_lookup) - rebuild(dim, label_lookup) + _maybe_label(labels, group_lookup) end - return group_dim, indices + # We combine lookup values and indices into on array + lookup_and_indices = HiddenVector(lookupvals, indices) + # And wrap and format them as a dimension + return format(rebuild(dim, lookup_and_indices)) end function _group_indices(dim::Dimension, bins::AbstractBins; labels=bins.labels) l = lookup(dim) diff --git a/src/opaque.jl b/src/opaque.jl new file mode 100644 index 000000000..87c234f30 --- /dev/null +++ b/src/opaque.jl @@ -0,0 +1,19 @@ +# OpaqueArray is an array that doesn't know what it holds, to simplify dispatch. +# One key property is that `parent(A::OpaqueArray)` returns the `OpaqueArray` `A` +# not the array it holds. +# +# It is often used here to hide dimensional arrays that may be generated lazily, +# To force them to act like simple Arrays without dimensional properties. +# +# OpaqueArray can also hold something that is not an AbstractArray itself. +struct OpaqueArray{T,N,P} <: AbstractArray{T,N} + parent::P +end +OpaqueArray(A::P) where P<:AbstractArray{T,N} where {T,N} = OpaqueArray{T,N,P}(A) +OpaqueArray(st::P) where P<:AbstractDimStack{<:Any,T,N} where {T,N} = OpaqueArray{T,N,P}(st) + +Base.size(A::OpaqueArray) = size(A.parent) +Base.getindex(A::OpaqueArray, I::Union{StandardIndices,Not{<:StandardIndices}}...) = + Base.getindex(A.parent, I...) +Base.setindex!(A::OpaqueArray, I::Union{StandardIndices,Not{<:StandardIndices}}...) = + Base.setindex!(A.parent, I...) \ No newline at end of file diff --git a/src/stack/methods.jl b/src/stack/methods.jl index 7e684ba84..5c9659972 100644 --- a/src/stack/methods.jl +++ b/src/stack/methods.jl @@ -68,8 +68,8 @@ julia> size(slices) (4, 2) julia> map(dims, axes(slices)) -(↓ Z Base.OneTo(4), -→ X Base.OneTo(2)) +(↓ Z, +→ X Categorical{Symbol} [:x1, :x2] ForwardOrdered) julia> first(slices) ┌ 3×5 DimStack ┐ diff --git a/src/tables.jl b/src/tables.jl index d158b2e1c..ef0c560d2 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -1,3 +1,22 @@ +# This lets use switch array of NamedTupleto NamedTuple of Array +struct LayerArray{K,T,N,A} <: AbstractArray{T,N} + data::A +end +function LayerArray{K}(a::A) where {A<:AbstractArray{<:NamedTuple,N}} where {K,N} + T = typeof(a[1][K]) + LayerArray{K,T,N,A}(a) +end +Base.parent(A::LayerArray) = parent(A.data) +Base.size(A::LayerArray) = size(parent(A)) +@propagate_inbounds Base.getindex(A::LayerArray{K}, I::Integer...) where K = + getproperty(getindex(parent(A), I...), K) + +function layerarrays(A::AbstractDimArray{<:NamedTuple{K}}) where K + map(K) do k + rebuild(A; data=LayerArray{k}(A)) + end |> NamedTuple{K} +end + """ AbstractDimTable <: Tables.AbstractColumns @@ -31,7 +50,7 @@ function _colnames(A::AbstractDimArray) n = Symbol(name(A)) == Symbol("") ? :value : Symbol(name(A)) (map(name, dims(A))..., n) end -_colnames(A::AbstractDimVector{T}) where T<:NamedTuple = +_colnames(A::AbstractDimArray{T}) where T<:NamedTuple = (map(name, dims(A))..., _colnames(T)...) _colnames(::Type{<:NamedTuple{Keys}}) where Keys = Keys @@ -59,39 +78,48 @@ To get dimension columns, you can index with `Dimension` (`X()`) or # Keywords - `mergedims`: Combine two or more dimensions into a new dimension. -- `layersfrom`: Treat a dimension of an `AbstractDimArray` as layers of an `AbstractDimStack`. +- `preservedims`: Preserve one or more dimensions from flattening into the table. + `DimArray`s of views with these dimensions will be present in the layer column, + rather than scalar values. +- `layersfrom`: Treat a dimension of an `AbstractDimArray` as layers of an `AbstractDimStack` + by specifying a dimension to use as layers. # Example -```jldoctest +Here we generate a GeoInterface.jl compatible table with `:geometry` +column made of `(X, Y)` points, and data columns from `:band` slices. + +```julia julia> using DimensionalData, Tables -julia> a = DimArray(ones(16, 16, 3), (X, Y, Dim{:band})) -┌ 16×16×3 DimArray{Float64, 3} ┐ -├──────────────────────── dims ┤ - ↓ X, → Y, ↗ band -└──────────────────────────────┘ -[:, :, 1] - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 … 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 … 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 … 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 … 1.0 1.0 1.0 1.0 1.0 1.0 1.0 - -julia> +julia> A = ones(X(4), Y(3), Dim{:band}('a':'d'); name=:data); +julia> DimTable(A; layersfrom=:band, mergedims=(X, Y)=>:geometry) +DimTable with 12 rows, 5 columns, and schema: + :geometry Tuple{Int64, Int64} + :band_a Float64 + :band_b Float64 + :band_c Float64 + :band_d Float64 ``` + +And here bands for each X/Y position are kept as vectors, using `preservedims`. +This may be useful if e.g. bands are color components of spectral images. + +```julia +julia> DimTable(A; preservedims=:band) +DimTable with 12 rows, 3 columns, and schema: + :X … Int64 + :Y Int64 + :data DimVector{Float64, Tuple{Dim{:band, Categorical{Char, StepRange{Char, Int64}, ForwardOrdered, NoMetadata}}}, Tuple{X{NoLookup{UnitRange{Int64}}}, Y{NoLookup{UnitRange{Int64}}}}, SubArray{Float64, 1, Array{Float64, 3}, Tuple{Int64, Int64, Slice{OneTo{Int64}}}, true}, Symbol, NoMetadata} (alias for DimArray{Float64, 1, Tuple{Dim{:band, DimensionalData.Dimensions.Lookups.Categorical{Char, StepRange{Char, Int64}, DimensionalData.Dimensions.Lookups.ForwardOrdered, DimensionalData.Dimensions.Lookups.NoMetadata}}}, Tuple{X{DimensionalData.Dimensions.Lookups.NoLookup{UnitRange{Int64}}}, Y{DimensionalData.Dimensions.Lookups.NoLookup{UnitRange{Int64}}}}, SubArray{Float64, 1, Array{Float64, 3}, Tuple{Int64, Int64, Base.Slice{Base.OneTo{Int64}}}, true}, Symbol, DimensionalData.Dimensions.Lookups.NoMetadata}) + +```julia +julia> DimTable(A) +DimTable with 48 rows, 4 columns, and schema: + :X Int64 + :Y Int64 + :band Char + :data Float64 """ struct DimTable{Mode} <: AbstractDimTable parent::Union{AbstractDimArray,AbstractDimStack} @@ -99,11 +127,18 @@ struct DimTable{Mode} <: AbstractDimTable dimcolumns::Vector{AbstractVector} dimarraycolumns::Vector end - -function DimTable(s::AbstractDimStack; +function DimTable(s::AbstractDimStack; mergedims=nothing, + preservedims=nothing, ) s = isnothing(mergedims) ? s : DD.mergedims(s, mergedims) + s = if isnothing(preservedims) + s + else + maplayers(s) do A + _maybe_presevedims(A, preservedims) + end + end dimcolumns = collect(_dimcolumns(s)) dimarraycolumns = if hassamedims(s) map(vec, layers(s)) @@ -113,16 +148,24 @@ function DimTable(s::AbstractDimStack; keys = collect(_colnames(s)) return DimTable{Columns}(s, keys, dimcolumns, dimarraycolumns) end -function DimTable(As::Vararg{AbstractDimArray}; - layernames=nothing, - mergedims=nothing, +function DimTable(As::AbstractVector{<:AbstractDimArray}; + layernames=nothing, + mergedims=nothing, + preservedims=nothing, ) # Check that dims are compatible - comparedims(As...) + comparedims(As) # Construct Layer Names layernames = isnothing(layernames) ? uniquekeys(As) : layernames # Construct dimension and array columns with DimExtensionArray - As = isnothing(mergedims) ? As : map(x -> DD.mergedims(x, mergedims), As) + As = isnothing(mergedims) ? As : map(x -> DimensionalData.mergedims(x, mergedims), As) + As = if isnothing(preservedims) + As + else + map(As) do A + _maybe_presevedims(A, preservedims) + end + end dims_ = dims(first(As)) dimcolumns = collect(_dimcolumns(dims_)) dimnames = collect(map(name, dims_)) @@ -132,9 +175,10 @@ function DimTable(As::Vararg{AbstractDimArray}; # Return DimTable return DimTable{Columns}(first(As), colnames, dimcolumns, dimarraycolumns) end -function DimTable(A::AbstractDimArray; - layersfrom=nothing, - mergedims=nothing, +function DimTable(A::AbstractDimArray; + layersfrom=nothing, + mergedims=nothing, + preservedims=nothing, ) if !isnothing(layersfrom) && any(hasdim(A, layersfrom)) d = dims(A, layersfrom) @@ -145,21 +189,36 @@ function DimTable(A::AbstractDimArray; else Symbol.(("$(name(d))_$i" for i in 1:nlayers)) end - return DimTable(layers...; layernames, mergedims) + return DimTable(layers; layernames, mergedims, preservedims) else - A = isnothing(mergedims) ? A : DD.mergedims(A, mergedims) - dimcolumns = collect(_dimcolumns(A)) - colnames = collect(_colnames(A)) - if (ndims(A) == 1) && (eltype(A) <: NamedTuple) - dimarrayrows = parent(A) - return DimTable{Rows}(A, colnames, dimcolumns, dimarrayrows) + A1 = isnothing(mergedims) ? A : DD.mergedims(A, mergedims) + if eltype(A1) <: NamedTuple + if isnothing(preservedims) + dimcolumns = collect(_dimcolumns(A1)) + colnames = collect(_colnames(A1)) + dimarrayrows = vec(parent(A1)) + return DimTable{Rows}(A1, colnames, dimcolumns, dimarrayrows) + else + las = layerarrays(A1) + layernames = collect(keys(las)) + return DimTable(collect(las); layernames, mergedims, preservedims) + end else - dimarraycolumns = [vec(parent(A))] - return DimTable{Columns}(A, colnames, dimcolumns, dimarraycolumns) + A2 = _maybe_presevedims(A1, preservedims) + dimcolumns = collect(_dimcolumns(A2)) + colnames = collect(_colnames(A2)) + dimarraycolumns = [vec(parent(A2))] + return DimTable{Columns}(A2, colnames, dimcolumns, dimarraycolumns) end end end +_maybe_presevedims(A, preservedims::Nothing) = A +function _maybe_presevedims(A, preservedims) + S = DimSlices(A; dims=otherdims(A, preservedims)) + rebuild(A; data=OpaqueArray(S), dims=dims(S)) +end + _dimcolumns(x) = map(d -> _dimcolumn(x, d), dims(x)) function _dimcolumn(x, d::Dimension) lookupvals = parent(lookup(d)) diff --git a/test/dimindices.jl b/test/dimindices.jl index ed7a59bc3..7a7311feb 100644 --- a/test/dimindices.jl +++ b/test/dimindices.jl @@ -38,7 +38,7 @@ A = zeros(X(4.0:7.0), Y(10.0:12.0)) @test @inferred size(A1[di[2:4, 1:2], Ti=1]) == (3, 2) @test @inferred A1[di] isa DimArray{Float64,3} @test @inferred A1[X=1][di] isa DimArray{Float64,2} - @test @inferred A1[X=1, Y=1][di] isa DimArray{Float64,1} + @test @inferred A1[X=1, Y=1] isa DimArray{Float64,1} # Indexing with no matching dims still returns a DimArray @test @inferred view(A1, X=1, Y=1, Ti=1)[di] == fill(0.0) @@ -207,11 +207,16 @@ end @testset "DimSlices" begin A = DimArray(((1:4) * (1:3)'), (X(4.0:7.0), Y(10.0:12.0)); name=:foo) - axisdims = map(dims(A, (X,))) do d - rebuild(d, axes(lookup(d), 1)) - end - ds = DimensionalData.DimSlices(A; dims=axisdims) + ds = DimensionalData.DimSlices(A; dims=X) + @test ds == ds[X=:] + # Works just like Slices + @test sum(ds) == sum(eachslice(A; dims=X)) @test ds == ds[X=:] + @test ds[X=At(7.0)] == [4, 8, 12] # Works just like Slices @test sum(ds) == sum(eachslice(A; dims=X)) + @test axes(ds) == axes(eachslice(A; dims=X)) + ds0 = DimensionalData.DimSlices(A; dims=()); + @test sum(ds0) == sum(eachslice(parent(A); dims=())) + @test axes(ds0) == axes(eachslice(parent(A); dims=())) end diff --git a/test/groupby.jl b/test/groupby.jl index cd19274bd..4e6c53f8c 100644 --- a/test/groupby.jl +++ b/test/groupby.jl @@ -39,7 +39,8 @@ end end |> permutedims gb_sum = sum.(groupby(A, Ti=>month, X => >(1.5))) @test dims(gb_sum, Ti) == Ti(Sampled([1:12...], ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata())) - @test typeof(dims(gb_sum, X)) == typeof(X(Sampled(BitVector([false, true]), ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata()))) + @test typeof(dims(gb_sum, X)) == + X{Sampled{Bool, DimensionalData.HiddenVector{Bool, BitVector, Vector{Vector{Int64}}}, ForwardOrdered, Irregular{Tuple{Nothing, Nothing}}, Points, NoMetadata}} @test gb_sum == manualsums combined_sum = combine(sum, groupby(A, Ti=>month, X => >(1.5))) @test collect(combined_sum) == manualsums @@ -51,7 +52,6 @@ end end |> permutedims gb_sum_st = sum.(groupby(st, Ti=>month, X => >(1.5))) @test dims(gb_sum_st, Ti) == Ti(Sampled([1:12...], ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata())) - @test typeof(dims(gb_sum_st, X)) == typeof(X(Sampled(BitVector([false, true]), ForwardOrdered(), Irregular((nothing, nothing)), Points(), NoMetadata()))) @test gb_sum_st == manualsums_st combined_sum_st = combine(sum, groupby(st, Ti=>month, X => >(1.5))) @test collect(combined_sum_st) == manualsums_st @@ -87,7 +87,7 @@ end B = rand(X(xs; sampling=Intervals(Start())), Ti(dates; sampling=Intervals(Start()))) gb = groupby(A, B) @test size(gb) === size(B) === size(mean.(gb)) - @test dims(gb) === dims(B) === dims(mean.(gb)) + @test parent(lookup(gb, X)) == parent(lookup(B, X)) == parent(lookup(mean.(gb), X)) manualmeans = mapreduce(hcat, intervals(dates)) do d map(intervals(xs)) do x mean(A[X=x, Ti=d]) diff --git a/test/indexing.jl b/test/indexing.jl index b2b9d147b..cf18a682a 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -208,6 +208,7 @@ end @test b == da[1:2:end] == da[Begin:2:End] v = @inferred da[1, :] + @test v isa DimVector @test @inferred v[1:2] isa DimArray @test @inferred v[rand(Bool, length(v))] isa DimArray b = v[[!iseven(i) for i in 1:length(v)]] @@ -500,6 +501,7 @@ end @testset "mixed dimensions" begin a = [[1 2 3; 4 5 6];;; [11 12 13; 14 15 16];;;] da = DimArray(a, (X(143.0:2:145.0), Y(-38.0:-36.0), Ti(100:100:200)); name=:test) + da[1, End(), Begin(), 1] da[Ti=1, DimIndices(da[Ti=1])] da[DimIndices(da[Ti=1]), Ti(2)] da[DimIndices(da[Ti=1])[:], Ti(2)] diff --git a/test/runtests.jl b/test/runtests.jl index 25330be32..18962daf4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -19,7 +19,6 @@ end @time @safetestset "merged" begin include("merged.jl") end @time @safetestset "DimUnitRange" begin include("dimunitrange.jl") end @time @safetestset "format" begin include("format.jl") end - @time @safetestset "array" begin include("array.jl") end @time @safetestset "stack" begin include("stack.jl") end @time @safetestset "indexing" begin include("indexing.jl") end diff --git a/test/stack.jl b/test/stack.jl index ee6d2eac0..1d7d36242 100644 --- a/test/stack.jl +++ b/test/stack.jl @@ -3,7 +3,7 @@ using DimensionalData, Test, LinearAlgebra, Statistics, ConstructionBase, Random using DimensionalData: data using DimensionalData: Sampled, Categorical, AutoLookup, NoLookup, Transformed, Regular, Irregular, Points, Intervals, Start, Center, End, - Metadata, NoMetadata, ForwardOrdered, ReverseOrdered, Unordered, layers, basedims, layerdims + Metadata, NoMetadata, ForwardOrdered, ReverseOrdered, Unordered, basedims, layerdims, layers, metadata A = [1.0 2.0 3.0; 4.0 5.0 6.0] diff --git a/test/tables.jl b/test/tables.jl index f5ea708db..26466fb4f 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -1,9 +1,10 @@ +using DataFrames +using Dates using DimensionalData -using Test -using Tables using IteratorInterfaceExtensions using TableTraits -using DataFrames +using Tables +using Test using DimensionalData.Lookups, DimensionalData.Dimensions using DimensionalData: DimTable, DimExtensionArray @@ -149,21 +150,67 @@ end @testset "DimTable mergelayers" begin a = DimStack([DimArray(rand(32, 32, 3), (X,Y,Ti)) for _ in 1:3]) - b = DimArray(rand(32, 32, 3), (X,Y,Dim{:band})) - t1 = DimTable(a, mergedims=(:X,:Y)=>:geometry) - t2 = DimTable(a, mergedims=(:X,:Y,:Z)=>:geometry) # Merge missing dimension - t3 = DimTable(a, mergedims=(X,:Y,Ti)=>:dimensions) # Mix symbols and dimensions - t4 = DimTable(b, mergedims=(:X,:Y)=>:geometry) # Test DimArray + b = DimArray(rand(32, 32, 3), (X, Y, Dim{:band})) + t1 = DimTable(a, mergedims=(:X, :Y) => :geometry) + t2 = DimTable(a, mergedims=(:X, :Y, :Z) => :geometry) # Merge missing dimension + t3 = DimTable(a, mergedims=(X, :Y, Ti) => :dimensions) # Mix symbols and dimensions + t4 = DimTable(b, mergedims=(:X, :Y) => :geometry) # Test DimArray @test Tables.columnnames(t1) == (:Ti, :geometry, :layer1, :layer2, :layer3) @test Tables.columnnames(t2) == (:Ti, :geometry, :layer1, :layer2, :layer3) @test Tables.columnnames(t3) == (:dimensions, :layer1, :layer2, :layer3) @test Tables.columnnames(t4) == (:band, :geometry, :value) end +@testset "DimTable preservedims" begin + x, y, t = X(1.0:32.0), Y(1.0:10.0), Ti(DateTime.([2001, 2002, 2003])) + st = DimStack([rand(x, y, t; name) for name in [:a, :b, :c]]) + A = rand(x, y, Dim{:band}(1:3); name=:vals) + t1 = DimTable(st, preservedims=(X, Y)) + a3 = Tables.getcolumn(t1, :a)[3] + @test Tables.columnnames(t1) == propertynames(t1) == (:Ti, :a, :b, :c) + @test a3 == st.a[Ti=3] + @test dims(a3) == dims(st, (X, Y)) + t2 = DimTable(A; preservedims=:band) + val10 = Tables.getcolumn(t2, :vals)[10] + @test Tables.columnnames(t2) == propertynames(t2) == (:X, :Y, :vals) + @test val10 == A[X(10), Y(1)] + @test dims(val10) == dims(A, (:band,)) + @testset "preservedims with mergedims" begin + t3 = DimTable(A; mergedims=(X, Y) => :geometry, preservedims=:band) + @test only(dims(t3)) isa Dim{:geometry} + @test Tables.getcolumn(t2, :vals)[1] isa DimArray + end +end + @testset "DimTable NamedTuple" begin - da = DimArray([(; a=1.0f0i, b=2.0i) for i in 1:10], X) - t = DimTable(da) - s = Tables.schema(t) - @test s.names == (:X, :a, :b) - @test s.types == (Int, Float32, Float64) + @testset "Vector of NamedTuple" begin + da = DimArray([(; a=1.0f0i, b=2.0i) for i in 1:10], X) + t = DimTable(da) + s = Tables.schema(t) + @test s.names == (:X, :a, :b) + @test s.types == (Int, Float32, Float64) + @test all(t.a .=== 1.0f0:10.0f0) + @test all(t.b .=== 2.0:2.0:20.0) + end + + @testset "Matrix of NamedTuple" begin + da = [(; a=1.0f0x*y, b=2.0x*y) for x in X(1:10), y in Y(1:5)] + t = DimTable(da); + s = Tables.schema(t) + @test s.names == (:X, :Y, :a, :b) + @test s.types == (Int, Int, Float32, Float64) + @test all(t.a .=== reduce(vcat, [1.0f0y:y:10.0f0y for y in 1:5])) + @test all(t.b .=== reduce(vcat, [2.0y:2.0y:20.0y for y in 1:5])) + end + @testset "Matrix of NamedTuple with preservedims" begin + da = [(; a=1.0f0x*y, b=2.0x*y) for x in X(1:10), y in Y(1:5)] + t = DimTable(da; preservedims=X); + s = Tables.schema(t) + @test s.names == (:Y, :a, :b) + @test s.types[1] <: Int + @test s.types[2] <: DimVector + @test s.types[2] <: DimVector + @test all(t.a .== [[1.0f0x*y for x in X(1:10)] for y in Y(1:5)]) + @test all(t.b .== [[2.0x*y for x in X(1:10)] for y in Y(1:5)]) + end end