diff --git a/src/tables.jl b/src/tables.jl index 6fa62e1a9..ba3d46ca0 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -24,11 +24,12 @@ Tables.schema(s::AbstractDimStack) = Tables.schema(DimTable(s)) Tables.getcolumn(DimTable(x), key) @inline Tables.getcolumn(x::DimTableSources, ::Type{T}, i::Int, key::Symbol) where T = Tables.getcolumn(DimTable(x), T, i, key) -@inline Tables.getcolumn(t::DimTableSources, dim::DimOrDimType) = - Tables.getcolumn(t, dimnum(t, dim)) +@inline Tables.getcolumn(x::DimTableSources, key::DimOrDimType) = + Tables.getcolumn(DimTable(x), key) -function _colnames(s::AbstractDimStack) - dimkeys = map(name, dims(s)) +_colnames(s::AbstractDimStack) = _colnames(s, dims(s)) +function _colnames(s::AbstractDimStack, alldims::Tuple) + dimkeys = map(name, alldims) # The data is always the last column/s (dimkeys..., keys(s)...) end @@ -38,9 +39,9 @@ end """ DimTable <: AbstractDimTable - DimTable(s::AbstractDimStack; mergedims=nothing) - DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing) - DimTable(xs::Vararg{AbstractDimArray}; layernames=nothing, mergedims=nothing) + DimTable(s::AbstractDimStack; mergedims=nothing[, refdims]) + DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing[, refdims]) + DimTable(xs::Vararg{AbstractDimArray}; layernames=nothing, mergedims=nothing[, refdims]) Construct a Tables.jl/TableTraits.jl compatible object out of an `AbstractDimArray` or `AbstractDimStack`. @@ -58,6 +59,7 @@ To get dimension columns, you can index with `Dimension` (`X()`) or # Keywords - `mergedims`: Combine two or more dimensions into a new dimension. - `layersfrom`: Treat a dimension of an `AbstractDimArray` as layers of an `AbstractDimStack`. +- `refdims`: Additional reference dimensions to add to the table, defaults to `()`. # Example @@ -86,29 +88,47 @@ julia> a = DimArray(ones(16, 16, 3), (X, Y, Dim{:band})) 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 … 1.0 1.0 1.0 1.0 1.0 1.0 1.0 -julia> - +julia> DimTable(a) +DimTable with 768 rows, 4 columns, and schema: + :X Int64 + :Y Int64 + :band Int64 + :value Float64 + +julia> DimTable(a[X(3), Y(2)]; refdims=(X(3:3), Y(2:2))) # slice X and Y and add the reference dimensions +DimTable with 3 rows, 4 columns, and schema: + :band Int64 + :X Int64 + :Y Int64 + :value Float64 ``` """ struct DimTable <: AbstractDimTable parent::Union{AbstractDimArray,AbstractDimStack} + dims::Tuple{Vararg{Dimension}} colnames::Vector{Symbol} dimcolumns::Vector{AbstractVector} dimarraycolumns::Vector{AbstractVector} end -function DimTable(s::AbstractDimStack; mergedims=nothing) +function DimTable(s::AbstractDimStack; mergedims=nothing, refdims=()) s = isnothing(mergedims) ? s : DD.mergedims(s, mergedims) - dimcolumns = collect(_dimcolumns(s)) - dimarraycolumns = if hassamedims(s) + alldims = combinedims(dims(s), refdims) + dimcolumns = collect(_dimcolumns(alldims)) + dimarraycolumns = if hassamedims(s) && isempty(refdims) map(vec, layers(s)) else - map(A -> vec(DimExtensionArray(A, dims(s))), layers(s)) + map(A -> vec(DimExtensionArray(A, alldims)), layers(s)) end |> collect - keys = collect(_colnames(s)) - return DimTable(s, keys, dimcolumns, dimarraycolumns) + keys = collect(_colnames(s, alldims)) + return DimTable(s, alldims, keys, dimcolumns, dimarraycolumns) end -function DimTable(xs::Vararg{AbstractDimArray}; layernames=nothing, mergedims=nothing) +function DimTable( + xs::Vararg{AbstractDimArray}; + layernames=nothing, + mergedims=nothing, + refdims=(), +) # Check that dims are compatible comparedims(xs...) @@ -117,16 +137,21 @@ function DimTable(xs::Vararg{AbstractDimArray}; layernames=nothing, mergedims=no # Construct dimension and array columns with DimExtensionArray xs = isnothing(mergedims) ? xs : map(x -> DimensionalData.mergedims(x, mergedims), xs) - dims_ = dims(first(xs)) - dimcolumns = collect(_dimcolumns(dims_)) - dimnames = collect(map(name, dims_)) + alldims = combinedims(dims(first(xs)), refdims) + dimcolumns = collect(_dimcolumns(alldims)) + dimnames = collect(map(name, alldims)) dimarraycolumns = collect(map(vec ∘ parent, xs)) colnames = vcat(dimnames, layernames) # Return DimTable - return DimTable(first(xs), colnames, dimcolumns, dimarraycolumns) + return DimTable(first(xs), alldims, colnames, dimcolumns, dimarraycolumns) end -function DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing) +function DimTable( + x::AbstractDimArray; + layersfrom=nothing, + mergedims=nothing, + refdims=(), +) if !isnothing(layersfrom) && any(hasdim(x, layersfrom)) d = dims(x, layersfrom) nlayers = size(x, d) @@ -136,10 +161,10 @@ function DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing) else Symbol.(("$(name(d))_$i" for i in 1:nlayers)) end - return DimTable(layers..., layernames=layernames, mergedims=mergedims) + return DimTable(layers...; layernames=layernames, mergedims=mergedims, refdims=refdims) else s = name(x) == NoName() ? DimStack((;value=x)) : DimStack(x) - return DimTable(s, mergedims=mergedims) + return DimTable(s; mergedims=mergedims, refdims=refdims) end end @@ -168,20 +193,22 @@ for func in (:dims, :val, :index, :lookup, :metadata, :order, :sampling, :span, end +_dims(t::DimTable) = getfield(t, :dims) + Tables.istable(::DimTable) = true Tables.columnaccess(::Type{<:DimTable}) = true Tables.columns(t::DimTable) = t Tables.columnnames(c::DimTable) = colnames(c) -function Tables.schema(t::DimTable) +function Tables.schema(t::DimTable) types = vcat([map(eltype, dimcolumns(t))...], [map(eltype, dimarraycolumns(t))...]) Tables.Schema(colnames(t), types) end @inline function Tables.getcolumn(t::DimTable, i::Int) nkeys = length(colnames(t)) - if i > length(dims(t)) - dimarraycolumns(t)[i - length(dims(t))] + if i > length(_dims(t)) + dimarraycolumns(t)[i - length(_dims(t))] elseif i > 0 && i < nkeys dimcolumns(t)[i] else @@ -190,7 +217,7 @@ end end @inline function Tables.getcolumn(t::DimTable, dim::DimOrDimType) - dimcolumns(t)[dimnum(t, dim)] + dimcolumns(t)[dimnum(_dims(t), dim)] end @inline function Tables.getcolumn(t::DimTable, key::Symbol) diff --git a/test/tables.jl b/test/tables.jl index b5bd416ea..b0e989ea9 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -5,58 +5,96 @@ using DimensionalData: DimTable, DimExtensionArray x = X([:a, :b, :c]) y = Y([10.0, 20.0]) +z = Z([3, 8]) d = Dim{:test}(1.0:1.0:3.0) dimz = x, y, d -da = DimArray(ones(3, 2, 3), dimz; name=:data) da2 = DimArray(fill(2, (3, 2, 3)), dimz; name=:data2) @testset "DimArray Tables interface" begin - ds = DimStack(da) - t = Tables.columns(ds) - @test t isa DimTable - @test dims(t) === dims(da) - @test parent(t) === ds - - @test Tables.columns(t) === t - @test length(t[:X]) == length(t[:Y]) == length(t[:test]) == 18 - - @test Tables.istable(typeof(t)) == Tables.istable(t) == - Tables.istable(typeof(da)) == Tables.istable(da) == - Tables.istable(typeof(ds)) == Tables.istable(ds) == true - @test Tables.columnaccess(t) == Tables.columnaccess(da) == Tables.columnaccess(ds) == true - @test Tables.rowaccess(t) == Tables.rowaccess(ds) == Tables.rowaccess(ds) == false - @test Tables.columnnames(t) == Tables.columnnames(da) == Tables.columnnames(ds) == (:X, :Y, :test, :data) - - sa = Tables.schema(da) - sds = Tables.schema(ds) - st = Tables.schema(t) - @test sa.names == sds.names == st.names == (:X, :Y, :test, :data) - @test sa.types == sds.types == st.types == (Symbol, Float64, Float64, Float64) - - @test Tables.getcolumn(t, 1) == Tables.getcolumn(t, :X) == Tables.getcolumn(t, X) == - Tables.getcolumn(ds, 1) == Tables.getcolumn(ds, :X) == Tables.getcolumn(ds, X) == - Tables.getcolumn(da, 1) == Tables.getcolumn(da, :X) == Tables.getcolumn(da, X) == - Tables.getcolumn(da, 1)[:] == repeat([:a, :b, :c], 6) - @test Tables.getcolumn(t, 2) == Tables.getcolumn(t, :Y) == - Tables.getcolumn(da, 2) == Tables.getcolumn(da, :Y) == - Tables.getcolumn(ds, 2) == Tables.getcolumn(ds, :Y) == - Tables.getcolumn(ds, 2)[:] == repeat([10.0, 10.0, 10.0, 20.0, 20.0, 20.0], 3) - @test Tables.getcolumn(t, 3) == Tables.getcolumn(t, :test) == - Tables.getcolumn(da, 3) == Tables.getcolumn(da, :test) == - Tables.getcolumn(ds, 3) == Tables.getcolumn(ds, :test) == - Tables.getcolumn(ds, 3)[:] == vcat(repeat([1.0], 6), repeat([2.0], 6), repeat([3.0], 6)) - @test Tables.getcolumn(t, 4) == Tables.getcolumn(t, :data) == - Tables.getcolumn(da, 4) == Tables.getcolumn(da, :data) == - Tables.getcolumn(ds, 4) == Tables.getcolumn(ds, :data) == - Tables.getcolumn(ds, 4)[:] == ones(3 * 2 * 3) - @test Tables.getcolumn(t, Float64, 4, :data) == ones(3 * 2 * 3) - @test Tables.getcolumn(t, Float64, 2, :Y) == Tables.getcolumn(da, Float64, 2, :Y) == - Tables.getcolumn(ds, Float64, 2, :Y) == - Tables.getcolumn(ds, Float64, 2, :Y)[:] == repeat([10.0, 10.0, 10.0, 20.0, 20.0, 20.0], 3) - @test_throws ArgumentError Tables.getcolumn(t, :NotAColumn) - @test_throws BoundsError Tables.getcolumn(t, 5) + @testset for dim_ref in ((), (z,)) + ref_names = name(dim_ref) + ref_num = length(dim_ref) + ref_size = prod(length, dim_ref; init=1) + da = DimArray(ones(3, 2, 3), dimz; name=:data, refdims=dim_ref) + + nrows = prod(size(da)) * ref_size + col_names = (:X, :Y, :test, ref_names..., :data) + col_names_no_ref = (:X, :Y, :test, :data) + col_eltypes = (Symbol, Float64, Float64, map(eltype, dim_ref)..., Float64) + col_eltypes_no_ref = (Symbol, Float64, Float64, Float64) + dim_vals = vec(collect(Iterators.product(dimz..., dim_ref...))) + col_vals = [getindex.(dim_vals, i) for i in eachindex(first(dim_vals))] + push!(col_vals, ones(nrows)) + + ds = DimStack(da) + t = DimTable(ds; refdims=dim_ref) + @test t isa DimTable + @test dims(t) === dims(da) + @test parent(t) === ds + t2 = Tables.columns(ds) + @test t2 isa DimTable + if isempty(dim_ref) + @test Tables.columnnames(t2) == Tables.columnnames(t) + end + + @test Tables.columns(t) === t + @test length(t[:X]) == length(t[:Y]) == length(t[:test]) == nrows + + @test Tables.istable(typeof(t)) == Tables.istable(t) == + Tables.istable(typeof(da)) == Tables.istable(da) == + Tables.istable(typeof(ds)) == Tables.istable(ds) == true + @test Tables.columnaccess(t) == Tables.columnaccess(da) == + Tables.columnaccess(ds) == true + @test Tables.rowaccess(t) == Tables.rowaccess(ds) == Tables.rowaccess(ds) == false + @test Tables.columnnames(t) == col_names + + alldims = combinedims(dims(ds), dim_ref) + col_dims = (alldims..., fill(nothing, length(col_names) - length(alldims))...) + @testset for (i, (col, dim, col_eltype)) in enumerate( + zip(col_names, col_dims, col_eltypes), + ) + col_val = Tables.getcolumn(t, i) + @test col_val == Tables.getcolumn(t, col) == col_vals[i] + + if !isnothing(dim) + @test col_val == Tables.getcolumn(t, dim) + end + end + @test_throws ArgumentError Tables.getcolumn(t, :NotAColumn) + @test_throws BoundsError Tables.getcolumn(t, length(col_names) + 1) + + sa = Tables.schema(da) + sds = Tables.schema(ds) + st = Tables.schema(t) + + @testset "consistency of DimStack and DimArray Tables interfaces" begin + @test Tables.columnnames(da) == Tables.columnnames(ds) == sa.names == sds.names == col_names_no_ref + @test sa.types == sds.types == col_eltypes_no_ref + @test Tables.columntable(da) == Tables.columntable(ds) + end + + isempty(dim_ref) || continue + @testset "DimTable interface with no refdims consistent with DimStack/DimArray Tables interfaces" begin + @test sa.names == col_names + @test sa.types == col_eltypes + @test Tables.columntable(da) == Tables.columntable(t) + @testset for (i, (col, dim, col_eltype)) in enumerate( + zip(col_names, col_dims, col_eltypes), + ) + @test col_vals[i] == Tables.getcolumn(da, col) == Tables.getcolumn(ds, col) == + Tables.getcolumn(da, i) == Tables.getcolumn(ds, i) + + if !isnothing(dim) + @test col_vals[i] == Tables.getcolumn(da, dim) == + Tables.getcolumn(ds, dim) == Tables.getcolumn(da, typeof(dim)) == + Tables.getcolumn(ds, typeof(dim)) + end + end + end + end end +da = DimArray(ones(3, 2, 3), dimz; name=:data) @testset "DimArray TableTraits interface" begin ds = DimStack(da) t = DimTable(ds)