Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 54 additions & 27 deletions src/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ Tables.schema(s::AbstractDimStack) = Tables.schema(DimTable(s))
Tables.getcolumn(DimTable(x), key)
@inline Tables.getcolumn(x::DimTableSources, ::Type{T}, i::Int, key::Symbol) where T =
Tables.getcolumn(DimTable(x), T, i, key)
@inline Tables.getcolumn(t::DimTableSources, dim::DimOrDimType) =
Tables.getcolumn(t, dimnum(t, dim))
@inline Tables.getcolumn(x::DimTableSources, key::DimOrDimType) =
Tables.getcolumn(DimTable(x), key)

function _colnames(s::AbstractDimStack)
dimkeys = map(name, dims(s))
_colnames(s::AbstractDimStack) = _colnames(s, dims(s))
function _colnames(s::AbstractDimStack, alldims::Tuple)
dimkeys = map(name, alldims)
# The data is always the last column/s
(dimkeys..., keys(s)...)
end
Expand All @@ -38,9 +39,9 @@ end
"""
DimTable <: AbstractDimTable

DimTable(s::AbstractDimStack; mergedims=nothing)
DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing)
DimTable(xs::Vararg{AbstractDimArray}; layernames=nothing, mergedims=nothing)
DimTable(s::AbstractDimStack; mergedims=nothing[, refdims])
DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing[, refdims])
DimTable(xs::Vararg{AbstractDimArray}; layernames=nothing, mergedims=nothing[, refdims])

Construct a Tables.jl/TableTraits.jl compatible object out of an `AbstractDimArray` or `AbstractDimStack`.

Expand All @@ -58,6 +59,7 @@ To get dimension columns, you can index with `Dimension` (`X()`) or
# Keywords
- `mergedims`: Combine two or more dimensions into a new dimension.
- `layersfrom`: Treat a dimension of an `AbstractDimArray` as layers of an `AbstractDimStack`.
- `refdims`: Additional reference dimensions to add to the table, defaults to `()`.

# Example

Expand Down Expand Up @@ -86,29 +88,47 @@ julia> a = DimArray(ones(16, 16, 3), (X, Y, Dim{:band}))
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0
1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 … 1.0 1.0 1.0 1.0 1.0 1.0 1.0

julia>

julia> DimTable(a)
DimTable with 768 rows, 4 columns, and schema:
:X Int64
:Y Int64
:band Int64
:value Float64

julia> DimTable(a[X(3), Y(2)]; refdims=(X(3:3), Y(2:2))) # slice X and Y and add the reference dimensions
DimTable with 3 rows, 4 columns, and schema:
:band Int64
:X Int64
:Y Int64
:value Float64
```
"""
struct DimTable <: AbstractDimTable
parent::Union{AbstractDimArray,AbstractDimStack}
dims::Tuple{Vararg{Dimension}}
colnames::Vector{Symbol}
dimcolumns::Vector{AbstractVector}
dimarraycolumns::Vector{AbstractVector}
end

function DimTable(s::AbstractDimStack; mergedims=nothing)
function DimTable(s::AbstractDimStack; mergedims=nothing, refdims=())
s = isnothing(mergedims) ? s : DD.mergedims(s, mergedims)
dimcolumns = collect(_dimcolumns(s))
dimarraycolumns = if hassamedims(s)
alldims = combinedims(dims(s), refdims)
dimcolumns = collect(_dimcolumns(alldims))
dimarraycolumns = if hassamedims(s) && isempty(refdims)
map(vec, layers(s))
else
map(A -> vec(DimExtensionArray(A, dims(s))), layers(s))
map(A -> vec(DimExtensionArray(A, alldims)), layers(s))
end |> collect
keys = collect(_colnames(s))
return DimTable(s, keys, dimcolumns, dimarraycolumns)
keys = collect(_colnames(s, alldims))
return DimTable(s, alldims, keys, dimcolumns, dimarraycolumns)
end
function DimTable(xs::Vararg{AbstractDimArray}; layernames=nothing, mergedims=nothing)
function DimTable(
xs::Vararg{AbstractDimArray};
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not clear from the documentation what assumptions this method makes about xs, so it's possible there's a mistake in this method.

layernames=nothing,
mergedims=nothing,
refdims=(),
)
# Check that dims are compatible
comparedims(xs...)

Expand All @@ -117,16 +137,21 @@ function DimTable(xs::Vararg{AbstractDimArray}; layernames=nothing, mergedims=no

# Construct dimension and array columns with DimExtensionArray
xs = isnothing(mergedims) ? xs : map(x -> DimensionalData.mergedims(x, mergedims), xs)
dims_ = dims(first(xs))
dimcolumns = collect(_dimcolumns(dims_))
dimnames = collect(map(name, dims_))
alldims = combinedims(dims(first(xs)), refdims)
dimcolumns = collect(_dimcolumns(alldims))
dimnames = collect(map(name, alldims))
dimarraycolumns = collect(map(vec ∘ parent, xs))
colnames = vcat(dimnames, layernames)

# Return DimTable
return DimTable(first(xs), colnames, dimcolumns, dimarraycolumns)
return DimTable(first(xs), alldims, colnames, dimcolumns, dimarraycolumns)
end
function DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing)
function DimTable(
x::AbstractDimArray;
layersfrom=nothing,
mergedims=nothing,
refdims=(),
)
if !isnothing(layersfrom) && any(hasdim(x, layersfrom))
d = dims(x, layersfrom)
nlayers = size(x, d)
Expand All @@ -136,10 +161,10 @@ function DimTable(x::AbstractDimArray; layersfrom=nothing, mergedims=nothing)
else
Symbol.(("$(name(d))_$i" for i in 1:nlayers))
end
return DimTable(layers..., layernames=layernames, mergedims=mergedims)
return DimTable(layers...; layernames=layernames, mergedims=mergedims, refdims=refdims)
else
s = name(x) == NoName() ? DimStack((;value=x)) : DimStack(x)
return DimTable(s, mergedims=mergedims)
return DimTable(s; mergedims=mergedims, refdims=refdims)
end
end

Expand Down Expand Up @@ -168,20 +193,22 @@ for func in (:dims, :val, :index, :lookup, :metadata, :order, :sampling, :span,

end

_dims(t::DimTable) = getfield(t, :dims)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it at all weird that dims(t::DimTable) will return fewer dims than the actual dims included in the table (because it just forwards to the parent)?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't even know it did that. We can fix these things and merge to breaking instead?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It didn't do that until this PR, since previously the table's dims were the parent dims, but now additional dims may be included.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh you mean the refdims? Yeah hkw do we keep that separate.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2 ways I can think of:

  • Keep dims(::DimTable) forwarding to the parent and add the method refdims(t::DimTable) = otherdims(dims(t), _dims(t)). Maybe not the right way to go if the parent is a slice of a DimMatrix where both dimensions have the same dim, but that kind of thing in general may not be well supported.
  • Remove the dims field and add a refdims and refdimarrays field. Then refdims(t::DimTable) = getfield(t, :refdims). I started implementing this version originally and abandoned it because it makes the column indexing more complicated, but I could bring it back.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we not just do refdims(dt::DimTable) = refdims(parent(dt)) ? Maybe I'm missing something

Copy link
Collaborator Author

@sethaxen sethaxen Oct 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess it depends on what dims(::DimTable) and refdims(::DimTable) should mean. With this PR, a user can provide arbitrary refdims to the DimTable, so they may not even be in the parent. But should refdims(::DimTable) return those user-provided refdims (currently only stored in (::DimTable).dims) or return those of the parent? Should dims(::DimTable) return just the dims of the parent, or should it return all dimensions corresponding to columns (with this PR, also includes user-provided refdims).


Tables.istable(::DimTable) = true
Tables.columnaccess(::Type{<:DimTable}) = true
Tables.columns(t::DimTable) = t
Tables.columnnames(c::DimTable) = colnames(c)

function Tables.schema(t::DimTable)
function Tables.schema(t::DimTable)
types = vcat([map(eltype, dimcolumns(t))...], [map(eltype, dimarraycolumns(t))...])
Tables.Schema(colnames(t), types)
end

@inline function Tables.getcolumn(t::DimTable, i::Int)
nkeys = length(colnames(t))
if i > length(dims(t))
dimarraycolumns(t)[i - length(dims(t))]
if i > length(_dims(t))
dimarraycolumns(t)[i - length(_dims(t))]
elseif i > 0 && i < nkeys
dimcolumns(t)[i]
else
Expand All @@ -190,7 +217,7 @@ end
end

@inline function Tables.getcolumn(t::DimTable, dim::DimOrDimType)
dimcolumns(t)[dimnum(t, dim)]
dimcolumns(t)[dimnum(_dims(t), dim)]
end

@inline function Tables.getcolumn(t::DimTable, key::Symbol)
Expand Down
128 changes: 83 additions & 45 deletions test/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,58 +5,96 @@ using DimensionalData: DimTable, DimExtensionArray

x = X([:a, :b, :c])
y = Y([10.0, 20.0])
z = Z([3, 8])
d = Dim{:test}(1.0:1.0:3.0)
dimz = x, y, d
da = DimArray(ones(3, 2, 3), dimz; name=:data)
da2 = DimArray(fill(2, (3, 2, 3)), dimz; name=:data2)

@testset "DimArray Tables interface" begin
ds = DimStack(da)
t = Tables.columns(ds)
@test t isa DimTable
@test dims(t) === dims(da)
@test parent(t) === ds

@test Tables.columns(t) === t
@test length(t[:X]) == length(t[:Y]) == length(t[:test]) == 18

@test Tables.istable(typeof(t)) == Tables.istable(t) ==
Tables.istable(typeof(da)) == Tables.istable(da) ==
Tables.istable(typeof(ds)) == Tables.istable(ds) == true
@test Tables.columnaccess(t) == Tables.columnaccess(da) == Tables.columnaccess(ds) == true
@test Tables.rowaccess(t) == Tables.rowaccess(ds) == Tables.rowaccess(ds) == false
@test Tables.columnnames(t) == Tables.columnnames(da) == Tables.columnnames(ds) == (:X, :Y, :test, :data)

sa = Tables.schema(da)
sds = Tables.schema(ds)
st = Tables.schema(t)
@test sa.names == sds.names == st.names == (:X, :Y, :test, :data)
@test sa.types == sds.types == st.types == (Symbol, Float64, Float64, Float64)

@test Tables.getcolumn(t, 1) == Tables.getcolumn(t, :X) == Tables.getcolumn(t, X) ==
Tables.getcolumn(ds, 1) == Tables.getcolumn(ds, :X) == Tables.getcolumn(ds, X) ==
Tables.getcolumn(da, 1) == Tables.getcolumn(da, :X) == Tables.getcolumn(da, X) ==
Tables.getcolumn(da, 1)[:] == repeat([:a, :b, :c], 6)
@test Tables.getcolumn(t, 2) == Tables.getcolumn(t, :Y) ==
Tables.getcolumn(da, 2) == Tables.getcolumn(da, :Y) ==
Tables.getcolumn(ds, 2) == Tables.getcolumn(ds, :Y) ==
Tables.getcolumn(ds, 2)[:] == repeat([10.0, 10.0, 10.0, 20.0, 20.0, 20.0], 3)
@test Tables.getcolumn(t, 3) == Tables.getcolumn(t, :test) ==
Tables.getcolumn(da, 3) == Tables.getcolumn(da, :test) ==
Tables.getcolumn(ds, 3) == Tables.getcolumn(ds, :test) ==
Tables.getcolumn(ds, 3)[:] == vcat(repeat([1.0], 6), repeat([2.0], 6), repeat([3.0], 6))
@test Tables.getcolumn(t, 4) == Tables.getcolumn(t, :data) ==
Tables.getcolumn(da, 4) == Tables.getcolumn(da, :data) ==
Tables.getcolumn(ds, 4) == Tables.getcolumn(ds, :data) ==
Tables.getcolumn(ds, 4)[:] == ones(3 * 2 * 3)
@test Tables.getcolumn(t, Float64, 4, :data) == ones(3 * 2 * 3)
@test Tables.getcolumn(t, Float64, 2, :Y) == Tables.getcolumn(da, Float64, 2, :Y) ==
Tables.getcolumn(ds, Float64, 2, :Y) ==
Tables.getcolumn(ds, Float64, 2, :Y)[:] == repeat([10.0, 10.0, 10.0, 20.0, 20.0, 20.0], 3)
@test_throws ArgumentError Tables.getcolumn(t, :NotAColumn)
@test_throws BoundsError Tables.getcolumn(t, 5)
@testset for dim_ref in ((), (z,))
ref_names = name(dim_ref)
ref_num = length(dim_ref)
ref_size = prod(length, dim_ref; init=1)
da = DimArray(ones(3, 2, 3), dimz; name=:data, refdims=dim_ref)

nrows = prod(size(da)) * ref_size
col_names = (:X, :Y, :test, ref_names..., :data)
col_names_no_ref = (:X, :Y, :test, :data)
col_eltypes = (Symbol, Float64, Float64, map(eltype, dim_ref)..., Float64)
col_eltypes_no_ref = (Symbol, Float64, Float64, Float64)
dim_vals = vec(collect(Iterators.product(dimz..., dim_ref...)))
col_vals = [getindex.(dim_vals, i) for i in eachindex(first(dim_vals))]
push!(col_vals, ones(nrows))

ds = DimStack(da)
t = DimTable(ds; refdims=dim_ref)
@test t isa DimTable
@test dims(t) === dims(da)
@test parent(t) === ds
t2 = Tables.columns(ds)
@test t2 isa DimTable
if isempty(dim_ref)
@test Tables.columnnames(t2) == Tables.columnnames(t)
end

@test Tables.columns(t) === t
@test length(t[:X]) == length(t[:Y]) == length(t[:test]) == nrows

@test Tables.istable(typeof(t)) == Tables.istable(t) ==
Tables.istable(typeof(da)) == Tables.istable(da) ==
Tables.istable(typeof(ds)) == Tables.istable(ds) == true
@test Tables.columnaccess(t) == Tables.columnaccess(da) ==
Tables.columnaccess(ds) == true
@test Tables.rowaccess(t) == Tables.rowaccess(ds) == Tables.rowaccess(ds) == false
@test Tables.columnnames(t) == col_names

alldims = combinedims(dims(ds), dim_ref)
col_dims = (alldims..., fill(nothing, length(col_names) - length(alldims))...)
@testset for (i, (col, dim, col_eltype)) in enumerate(
zip(col_names, col_dims, col_eltypes),
)
col_val = Tables.getcolumn(t, i)
@test col_val == Tables.getcolumn(t, col) == col_vals[i]

if !isnothing(dim)
@test col_val == Tables.getcolumn(t, dim)
end
end
@test_throws ArgumentError Tables.getcolumn(t, :NotAColumn)
@test_throws BoundsError Tables.getcolumn(t, length(col_names) + 1)

sa = Tables.schema(da)
sds = Tables.schema(ds)
st = Tables.schema(t)

@testset "consistency of DimStack and DimArray Tables interfaces" begin
@test Tables.columnnames(da) == Tables.columnnames(ds) == sa.names == sds.names == col_names_no_ref
@test sa.types == sds.types == col_eltypes_no_ref
@test Tables.columntable(da) == Tables.columntable(ds)
end

isempty(dim_ref) || continue
@testset "DimTable interface with no refdims consistent with DimStack/DimArray Tables interfaces" begin
@test sa.names == col_names
@test sa.types == col_eltypes
@test Tables.columntable(da) == Tables.columntable(t)
@testset for (i, (col, dim, col_eltype)) in enumerate(
zip(col_names, col_dims, col_eltypes),
)
@test col_vals[i] == Tables.getcolumn(da, col) == Tables.getcolumn(ds, col) ==
Tables.getcolumn(da, i) == Tables.getcolumn(ds, i)

if !isnothing(dim)
@test col_vals[i] == Tables.getcolumn(da, dim) ==
Tables.getcolumn(ds, dim) == Tables.getcolumn(da, typeof(dim)) ==
Tables.getcolumn(ds, typeof(dim))
end
end
end
end
end

da = DimArray(ones(3, 2, 3), dimz; name=:data)
@testset "DimArray TableTraits interface" begin
ds = DimStack(da)
t = DimTable(ds)
Expand Down
Loading