Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix Vector{Bool} branches and factorize interped_data #95

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/UnROOT.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import AbstractTrees: children, printnode, print_tree
using CodecZlib, CodecLz4, CodecXz, CodecZstd, StaticArrays, LorentzVectors, ArraysOfArrays
using Mixers, Parameters, Memoization, LRUCache

import Tables, TypedTables, PrettyTables, DataFrames
import Tables, TypedTables, PrettyTables

@static if VERSION < v"1.6"
Base.first(a::AbstractVector{S}, n::Integer) where S<: AbstractString = a[1:(length(a) > n ? n : end)]
Expand Down
53 changes: 20 additions & 33 deletions src/custom.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ function interped_data(rawdata, rawoffsets, ::Type{Vector{LVF64}}, ::Type{Offset
offset .+= 1
VectorOfVectors(real_data, offset)
end
function interped_data(rawdata, rawoffsets, ::Type{LVF64}, ::Type{J}) where {T, J <: JaggType}
function interped_data(rawdata, rawoffsets, ::Type{LVF64}, ::Type{Nojagg})
# even with rawoffsets, we know each TLV is destinied to be 64 bytes
[
reinterpret(LVF64, x) for x in Base.Iterators.partition(rawdata, 64)
Expand All @@ -82,48 +82,32 @@ end
# TLorentzVector ends

# KM3NeT
struct KM3NETDAQHit <: CustomROOTStruct
struct _KM3NETDAQHit <: CustomROOTStruct
dom_id::Int32
channel_id::UInt8
tdc::Int32
tot::UInt8
end
function readtype(io::IO, T::Type{KM3NETDAQHit})
function readtype(io::IO, T::Type{_KM3NETDAQHit})
T(readtype(io, Int32), read(io, UInt8), read(io, Int32), read(io, UInt8))
end
function interped_data(rawdata, rawoffsets, ::Type{Vector{KM3NETDAQHit}}, ::Type{J}) where {T, J <: UnROOT.JaggType}
UnROOT.splitup(rawdata, rawoffsets, KM3NETDAQHit, skipbytes=10)
function interped_data(rawdata, rawoffsets, ::Type{Vector{_KM3NETDAQHit}}, ::Type{Nojagg})
UnROOT.splitup(rawdata, rawoffsets, _KM3NETDAQHit, skipbytes=10)
end


# Experimental implementation for maximum performance (using reinterpret)
primitive type DAQHit 80 end
function Base.getproperty(hit::DAQHit, s::Symbol)
r = Ref(hit)
GC.@preserve r begin
if s === :dom_id
return ntoh(unsafe_load(Ptr{Int32}(Base.unsafe_convert(Ptr{Cvoid}, r))))
elseif s === :channel_id
return unsafe_load(Ptr{UInt8}(Base.unsafe_convert(Ptr{Cvoid}, r)+4))
elseif s === :tdc
return unsafe_load(Ptr{UInt32}(Base.unsafe_convert(Ptr{Cvoid}, r)+5))
elseif s === :tot
return unsafe_load(Ptr{UInt8}(Base.unsafe_convert(Ptr{Cvoid}, r)+9))
end
end
error("unknown field $s of type $(typeof(hit))")
function interped_data(rawdata, rawoffsets, ::Type{Vector{_KM3NETDAQHit}}, ::Type{Offsetjagg})
UnROOT.splitup(rawdata, rawoffsets, _KM3NETDAQHit, skipbytes=10)
end
Base.show(io::IO, h::DAQHit) = print(io, "DAQHit(", h.dom_id, ',', h.channel_id, ',', h.tdc, ',', h.tot, ')')


struct KM3NETDAQTriggeredHit
struct _KM3NETDAQTriggeredHit
dom_id::Int32
channel_id::UInt8
tdc::Int32
tot::UInt8
trigger_mask::UInt64
end
function readtype(io::IO, T::Type{KM3NETDAQTriggeredHit})
packedsizeof(::Type{_KM3NETDAQTriggeredHit}) = 24 # incl. cnt and vers
function readtype(io::IO, T::Type{_KM3NETDAQTriggeredHit})
dom_id = readtype(io, Int32)
channel_id = read(io, UInt8)
tdc = read(io, Int32)
Expand All @@ -133,11 +117,14 @@ function readtype(io::IO, T::Type{KM3NETDAQTriggeredHit})
T(dom_id, channel_id, tdc, tot, trigger_mask)
end

function UnROOT.interped_data(rawdata, rawoffsets, ::Type{Vector{KM3NETDAQTriggeredHit}}, ::Type{J}) where {T, J <: UnROOT.JaggType}
UnROOT.splitup(rawdata, rawoffsets, KM3NETDAQTriggeredHit, skipbytes=10)
function UnROOT.interped_data(rawdata, rawoffsets, ::Type{Vector{_KM3NETDAQTriggeredHit}}, ::Type{Nojagg})
UnROOT.splitup(rawdata, rawoffsets, _KM3NETDAQTriggeredHit, skipbytes=10)
end
function UnROOT.interped_data(rawdata, rawoffsets, ::Type{Vector{_KM3NETDAQTriggeredHit}}, ::Type{Offsetjagg})
UnROOT.splitup(rawdata, rawoffsets, _KM3NETDAQTriggeredHit, skipbytes=10)
end

struct KM3NETDAQEventHeader
struct _KM3NETDAQEventHeader
detector_id::Int32
run::Int32
frame_index::Int32
Expand All @@ -147,9 +134,9 @@ struct KM3NETDAQEventHeader
trigger_mask::UInt64
overlays::UInt32
end
packedsizeof(::Type{KM3NETDAQEventHeader}) = 76
packedsizeof(::Type{_KM3NETDAQEventHeader}) = 76

function readtype(io::IO, T::Type{KM3NETDAQEventHeader})
function readtype(io::IO, T::Type{_KM3NETDAQEventHeader})
skip(io, 18)
detector_id = readtype(io, Int32)
run = readtype(io, Int32)
Expand All @@ -165,6 +152,6 @@ function readtype(io::IO, T::Type{KM3NETDAQEventHeader})
T(detector_id, run, frame_index, UTC_seconds, UTC_16nanosecondcycles, trigger_counter, trigger_mask, overlays)
end

function UnROOT.interped_data(rawdata, rawoffsets, ::Type{KM3NETDAQEventHeader}, ::Type{J}) where {T, J <: UnROOT.JaggType}
UnROOT.splitup(rawdata, rawoffsets, KM3NETDAQEventHeader, jagged=false)
function UnROOT.interped_data(rawdata, rawoffsets, ::Type{_KM3NETDAQEventHeader}, ::Type{Nojagg})
UnROOT.splitup(rawdata, rawoffsets, _KM3NETDAQEventHeader, jagged=false)
end
43 changes: 31 additions & 12 deletions src/displays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,34 @@ These functions are used to display a ROOTFile is a tree-like fashion
by using `AbstractTrees` printing functions. We customize what the children
of ROOTFile and a TTree is, and how to print the final `node`.
=#
struct TKeyNode
name::AbstractString
classname::AbstractString
end
function children(f::ROOTFile)
ch = Vector{TTree}()
# display TTrees recursively
# subsequent TTrees with duplicate fName will be skipped
# since TKey cycle number is guaranteed to be decreasing
# then all TKeys in the file which are not for a TTree
seen = Set{String}()
ch = Vector{Union{TTree,TKeyNode}}()
lock(f)
for k in keys(f)
lock(f.fobj)
try
push!(ch, f[k])
obj = f[k]
obj isa TTree || continue
obj.fName ∈ seen && continue
push!(ch, obj)
push!(seen, obj.fName)
catch
finally
unlock(f.fobj)
end
end
for tkey in f.directory.keys
kn = TKeyNode(tkey.fName, tkey.fClassName)
kn.classname == "TTree" && continue
push!(ch, kn)
end
unlock(f)
ch
end
function children(t::TTree)
Expand All @@ -26,15 +43,16 @@ function children(t::TTree)
return ks
end
end
printnode(io::IO, t::TTree) = print(io, t.fName)
printnode(io::IO, t::TTree) = print(io, "$(t.fName) (TTree)")
printnode(io::IO, f::ROOTFile) = print(io, f.filename)
printnode(io::IO, k::TKeyNode) = print(io, "$(k.name) ($(k.classname))")

function Base.show(io::IO, tree::LazyTree)
_hs = _make_header(tree)
_ds = displaysize(io)
PrettyTables.pretty_table(
io,
tree;
innertable(tree);
header=_hs,
alignment=:l,
vlines=[1],
Expand All @@ -44,22 +62,23 @@ function Base.show(io::IO, tree::LazyTree)
row_number_column_title="Row",
show_row_number=true,
compact_printing=false,
formatters=(v, i, j) -> _treeformat(v, _ds[2] ÷ min(5, length(_hs[1]))),
formatters=(v, i, j) -> _treeformat(v, _ds[2] ÷ min(8, length(_hs[1]))),
display_size=(min(_ds[1], 40), min(_ds[2], 160)),
)
end
_symtup2str(symtup, trunc=15) = collect(first.(string.(symtup), trunc))
function _make_header(t)
pn = propertynames(t)
header = _symtup2str(pn)
subheader = _symtup2str(Tables.columntype.(Ref(t), pn))
subheader = _symtup2str(Tables.columntype.(Ref(innertable(t)), pn))
(header, subheader)
end
function _treeformat(val, trunc)
s = if val isa Vector{T} where T<:Integer
s = if val isa AbstractArray{T} where T<:Integer
string(Int.(val))
elseif val isa Vector{T} where T<:AbstractFloat
string(round.(Float64.(val); sigdigits=3))
elseif val isa AbstractArray{T} where T<:AbstractFloat
T = eltype(val)
replace(string(round.(T.(val); sigdigits=3)), string(T)=>"")
else
string(val)
end
Expand Down
45 changes: 18 additions & 27 deletions src/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Reads all branches from a tree.
"""
function arrays(f::ROOTFile, treename)
names = keys(f[treename])
res = Vector{Any}(undef, length(names))
res = Vector{Vector}(undef, length(names))
Threads.@threads for i in eachindex(names)
res[i] = array(f, "$treename/$(names[i])")
end
Expand Down Expand Up @@ -122,12 +122,12 @@ Base.eltype(ba::LazyBranch{T,J,B}) where {T,J,B} = T

function Base.show(io::IO, lb::LazyBranch)
summary(io, lb)
println(":")
println(" File: $(lb.f.filename)")
println(" Branch: $(lb.b.fName)")
println(" Description: $(lb.b.fTitle)")
println(" NumEntry: $(lb.L)")
print(" Entry Type: $(eltype(lb))")
println(io, ":")
println(io, " File: $(lb.f.filename)")
println(io, " Branch: $(lb.b.fName)")
println(io, " Description: $(lb.b.fTitle)")
println(io, " NumEntry: $(lb.L)")
print(io, " Entry Type: $(eltype(lb))")
nothing
end

Expand Down Expand Up @@ -166,27 +166,24 @@ function Base.iterate(ba::LazyBranch{T,J,B}, idx=1) where {T,J,B}
return (ba[idx], idx + 1)
end

const _LazyTreeType =
TypedTables.Table{<:NamedTuple,1,NamedTuple{S,N}} where {S,N<:Tuple{Vararg{LazyBranch}}}

struct LazyTree{T} <: DataFrames.AbstractDataFrame
struct LazyTree{T}
treetable::T
colidx::DataFrames.Index
end

@inline innertable(t::LazyTree) = Core.getfield(t, :treetable)

Base.propertynames(lt::LazyTree) = propertynames(innertable(lt))
Base.getproperty(lt::LazyTree, s::Symbol) = getproperty(innertable(lt), s)

# a specific branch
Base.getindex(lt::LazyTree, row::Int) = innertable(lt)[row]
function Base.getindex(lt::LazyTree, rang::UnitRange)
return LazyTree(innertable(lt)[rang], Core.getfield(lt, :colidx))
return LazyTree(innertable(lt)[rang])
end
Base.getindex(lt::LazyTree, ::typeof(!), s::Symbol) = lt[:, s]
Base.getindex(lt::LazyTree, ::Colon, i::Int) = lt[:, propertynames(lt)[i]]
Base.getindex(lt::LazyTree, ::typeof(!), i::Int) = lt[:, propertynames(lt)[i]]
Base.getindex(lt::LazyTree, ::Colon, s::Symbol) = getproperty(innertable(lt), s) # the real deal

# a specific event
Base.getindex(lt::LazyTree, row::Int, col::Int) = lt[:, col][row]
Base.getindex(lt::LazyTree, row::Int, col::Symbol) = lt[:, col][row]
Base.getindex(lt::LazyTree, rows::UnitRange, col::Symbol) = lt[:, col][rows]
Base.getindex(lt::LazyTree, ::Colon) = lt[1:end]
Expand All @@ -200,13 +197,9 @@ Base.lastindex(e::Iterators.Enumerate{LazyTree{T}}) where T = lastindex(e.itr)
Base.eachindex(e::Iterators.Enumerate{LazyTree{T}}) where T = eachindex(e.itr)
Base.getindex(e::Iterators.Enumerate{LazyTree{T}}, row::Int) where T = (row, first(iterate(e.itr, row)))

# interfacing AbstractDataFrame
DataFrames._check_consistency(lt::LazyTree) = nothing #we're read-only
# interfacing Table
Base.names(lt::LazyTree) = collect(String.(propertynames(innertable(lt))))
DataFrames.index(lt::LazyTree) = Core.getfield(lt, :colidx)
DataFrames.ncol(lt::LazyTree) = length(DataFrames.index(lt))
Base.length(lt::LazyTree) = length(innertable(lt))
DataFrames.nrow(lt::LazyTree) = length(lt)

function getbranchnamesrecursive(obj)
out = Vector{String}()
Expand All @@ -223,7 +216,7 @@ end
LazyTree(f::ROOTFile, s::AbstractString, branche::Union{AbstractString, Regex})
LazyTree(f::ROOTFile, s::AbstractString, branches::Vector{Union{AbstractString, Regex}})

Constructor for `LazyTree`, which is close to an `AbstractDataFrame` (interface wise),
Constructor for `LazyTree`, which is close to an `DataFrame` (interface wise),
and a lazy `TypedTables.Table` (speed wise). Looping over a `LazyTree` is fast and type
stable. Internally, `LazyTree` contains a typed table whose branch are [`LazyBranch`](@ref).
This means that at any given time only `N` baskets are cached, where `N` is the number of branches.
Expand Down Expand Up @@ -251,16 +244,14 @@ function LazyTree(f::ROOTFile, s::AbstractString, branches)
@warn "Your tree is quite wide, with $(length(branches)) branches, this will take compiler a moment."
end
d = Dict{Symbol,LazyBranch}()
d_colidx = Dict{Symbol,Int}()
_m(s::AbstractString) = isequal(s)
_m(r::Regex) = Base.Fix1(occursin, r)
branches = mapreduce(b -> filter(_m(b), getbranchnamesrecursive(tree)), ∪, branches)
SB = Symbol.(branches)
for (i, b) in enumerate(SB)
for b in SB
d[b] = f["$s/$b"]
d_colidx[b] = i
end
return LazyTree(TypedTables.Table(d), DataFrames.Index(d_colidx, SB))
return LazyTree(TypedTables.Table(d))
end

function LazyTree(f::ROOTFile, s::AbstractString)
Expand All @@ -285,7 +276,7 @@ end
function Base.getproperty(evt::LazyEvent, s::Symbol)
@inbounds getproperty(Core.getfield(evt, :tree), s)[Core.getfield(evt, :idx)]
end
Base.collect(evt::LazyEvent) = Core.getfield(evt, :tree)[Core.getfield(evt, :idx)]
Base.collect(evt::LazyEvent) = @inbounds Core.getfield(evt, :tree)[Core.getfield(evt, :idx)]

function Base.iterate(tree::T, idx=1) where {T<:LazyTree}
idx > length(tree) && return nothing
Expand Down
Loading