Skip to content

Commit

Permalink
Improve loadtime by removing LazyArray (and be ready for julia 1.9) (#…
Browse files Browse the repository at this point in the history
…189)

* change lazy vcat backend

* use SentinelVector

* funky allocation in old versions
  • Loading branch information
Moelf authored Oct 20, 2022
1 parent f6819e6 commit c025365
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 19 deletions.
10 changes: 4 additions & 6 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "UnROOT"
uuid = "3cd96dde-e98d-4713-81e9-a4a1b0235ce9"
authors = ["Tamas Gal", "Jerry Ling", "Johannes Schumann", "Nick Amin"]
version = "0.8.16"
version = "0.8.17"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand All @@ -12,14 +12,14 @@ CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
LRUCache = "8ac3fa9e-de4c-5943-b1dc-09c6b5f20637"
LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
LibDeflate = "9255714d-24a7-4b30-8ea3-d46a97f7e13b"
LorentzVectors = "3f54b04b-17fc-5cd4-9758-90c048d965e3"
Memoization = "6fafb56a-5788-4b4e-91ca-c0cea6611c73"
Mixers = "2a8e4939-dab8-5edc-8f64-72a8776f13de"
Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
SentinelArrays = "91c51154-3ec4-41a3-a24f-3f23e20d615c"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
xrootdgo_jll = "9d84c17e-11f2-50ef-8cc9-e9701362097f"
Expand All @@ -33,14 +33,13 @@ CodecZstd = "^0.6.0, ^0.7.0"
HTTP = "^0.9.7, 1"
IterTools = "^1"
LRUCache = "^1.3.0"
LazyArrays = "^0.21, ^0.22, ^1"
LibDeflate = "^0.4.1"
LorentzVectors = "^0.4.0"
Memoization = "^0.1.10"
Mixers = "^0.1.0"
Parameters = "^0.12.0"
Polyester = "^0.5.3"
PrettyTables = "2"
SentinelArrays = "^1.3"
StaticArrays = "^0.12.0, ^1"
Tables = "^1.0.0"
julia = "^1.6"
Expand All @@ -50,9 +49,8 @@ xrootdgo_jll = "^0.31.1"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
MD5 = "6ac74813-4b46-53a4-afec-0b5dc9d7885c"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
ThreadsX = "ac1d9e8a-700a-412c-b207-f0111f4b6c0d"

[targets]
test = ["Test", "Pkg", "ThreadsX", "MD5", "InteractiveUtils", "Polyester"]
test = ["Test", "Pkg", "ThreadsX", "MD5", "InteractiveUtils"]
10 changes: 9 additions & 1 deletion src/UnROOT.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module UnROOT

using LazyArrays
import SentinelArrays: ChainedVector
import Mmap: mmap
export ROOTFile, LazyBranch, LazyTree

Expand Down Expand Up @@ -44,4 +44,12 @@ include("iteration.jl")
include("custom.jl")
include("displays.jl")

@static if VERSION >= v"1.9"
let
t = LazyTree(UnROOT.samplefile("tree_with_jagged_array.root"), "t1")
show(devnull, t)
show(devnull, t[1])
end
end

end # module
44 changes: 37 additions & 7 deletions src/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -250,17 +250,47 @@ Base.length(lt::LazyTree) = length(first(Tables.columns(lt)))
Base.ndims(::Type{<:LazyTree}) = 1
Base.size(lt::LazyTree) = size(first(Tables.columns(lt))) # all column has the same size

function LazyArrays.Vcat(ts::LazyTree...)
"""
chaintrees(ts)
Chain a collection of `LazyTree`s together to form a larger tree, every tree should
have identical branch names and types, we're not trying to re-implement SQL here.
## Example
```julia
julia> typeof(tree)
LazyTree with 1 branches:
a
julia> tree2 = UnROOT.chaintrees([tree,tree]);
julia> eltype(tree.a) == eltype(tree2.a)
true
julia> length(tree)
100
julia> length(tree2)
200
julia> eltype(tree)
UnROOT.LazyEvent{NamedTuple{(:a,), Tuple{LazyBranch{Int32, UnROOT.Nojagg, Vector{Int32}}}}}
julia> eltype(tree2)
UnROOT.LazyEvent{NamedTuple{(:a,), Tuple{SentinelArrays.ChainedVector{Int32, LazyBranch{Int32, UnROOT.Nojagg, Vector{Int32}}}}}}
```
"""
function chaintrees(ts)
branch_names = propertynames(first(ts))
res_branches = map(branch_names) do bname
LazyArrays.Vcat(getproperty.(ts, bname)...)
ChainedVector(getproperty.(ts, bname))
end
LazyTree(NamedTuple{branch_names}(res_branches))
end
Base.vcat(ts::LazyTree...) = Vcat(ts...)
Base.reduce(::typeof(vcat), ts::AbstractVector{<:LazyTree}) = Vcat((ts)...)
Base.mapreduce(f, ::typeof(vcat), ts::Vector{<:LazyTree}) = Vcat(f.(ts)...)
Base.mapreduce(f, ::typeof(Vcat), ts::Vector{<:LazyTree}) = Vcat(f.(ts)...)

Base.vcat(ts::LazyTree...) = chaintrees(collect(ts))
Base.reduce(::typeof(vcat), ts::AbstractVector{<:LazyTree}) = chaintrees(ts)

function getbranchnamesrecursive(obj)
out = Vector{String}()
Expand Down Expand Up @@ -372,7 +402,7 @@ function Base.getindex(ba::LazyBranch{T,J,B}, range::UnitRange) where {T,J,B}
ib2 = findfirst(x -> x > (last(range) - 1), ba.fEntry) - 1
offset = ba.fEntry[ib1]
range = (first(range)-offset):(last(range)-offset)
return Vcat(asyncmap(i->basketarray(ba, i), ib1:ib2)...)[range]
return ChainedVector(asyncmap(i->basketarray(ba, i), ib1:ib2))[range]
end

_clusterranges(t::LazyTree) = _clusterranges([getproperty(t,p) for p in propertynames(t)])
Expand Down
15 changes: 10 additions & 5 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ using StaticArrays
using InteractiveUtils
using MD5

using ThreadsX, Polyester
using ThreadsX

const SAMPLES_DIR = joinpath(@__DIR__, "samples")

Expand Down Expand Up @@ -719,10 +719,12 @@ t = LazyTree(ROOTFile(joinpath(SAMPLES_DIR, "NanoAODv5_sample.root")), "Events",

nmus .= 0
t_dummy = LazyTree(ROOTFile(joinpath(SAMPLES_DIR, "NanoAODv5_sample.root")), "Events", ["Muon_pt"])
@batch for evt in vcat(t,t_dummy) # avoid using the same underlying file handler
chained_tree = vcat(t,t_dummy)
Threads.@threads for evt in chained_tree # avoid using the same underlying file handler
nmus[Threads.threadid()] += length(evt.Muon_pt)
end
@test sum(nmus) == 2*878
@test mapreduce(length, +, [t,t_dummy]) == length(t) + length(t_dummy)

for j in 1:3
inds = [Vector{Int}() for _ in 1:nthreads]
Expand Down Expand Up @@ -796,11 +798,14 @@ end
@test sum(UnROOT._clusterbytes([t.b2]; compressed=true)) == 23710.0 # same as uproot4
end

@testset "Vcat/chaining" begin
@testset "vcat/chaining" begin
rootfile = ROOTFile(joinpath(SAMPLES_DIR, "NanoAODv5_sample.root"))
t = LazyTree(rootfile, "Events", ["nMuon", "Muon_pt"])
tt = vcat(t,t)
@test (@allocated vcat(t,t)) < 1000
tt = UnROOT.chaintrees([t,t])
@test all(vcat(t, t).Muon_pt .== tt.Muon_pt)
@static if VERSION >= v"1.7"
@test (@allocated UnROOT.chaintrees([t,t])) < 1000
end
@test length(tt) == 2*length(t)
s1 = sum(t.nMuon)
s2 = sum(tt.nMuon)
Expand Down

2 comments on commit c025365

@Moelf
Copy link
Member Author

@Moelf Moelf commented on c025365 Oct 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/70716

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.8.17 -m "<description of version>" c0253652cf6da200b84172ec1cb57d0de70b55e5
git push origin v0.8.17

Please sign in to comment.