Skip to content

Commit c06a707

Browse files
committed
use sort = nothing by default
1 parent 960138b commit c06a707

File tree

3 files changed

+15
-7
lines changed

3 files changed

+15
-7
lines changed

Project.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "GroupedArrays"
22
uuid = "6407cd72-fade-4a84-8a1e-56e431fc1533"
33
authors = ["matthieugomez <[email protected]>"]
4-
version = "0.2.1"
4+
version = "0.2.2"
55

66
[deps]
77
DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"

src/GroupedArrays.jl

+14-5
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,21 @@ Base.@propagate_inbounds function Base.setindex!(g::GroupedArray{T}, x::Missing,
3737
@boundscheck checkbounds(g, i)
3838
@inbounds g.refs[i] = 0
3939
end
40+
"""
41+
Constructor for GroupedArrays
4042
41-
# Constructor
42-
function GroupedArray(args...; coalesce = false)
43+
GroupedArray constructor always promises that all elements between 1 and ngroups (included) are presented in refs. However, this is not necessarly true aftewards (setindex! does not check that the replaced ref corresponds to the last one)
44+
45+
if coalesce = true, missing values are associated an integer
46+
if sort = false, groups are created in order of appearances. If sort = true, groups are sorted. If sort = nothing, fastest algorithm is used.
47+
"""
48+
function GroupedArray(args...; coalesce = false, sort = nothing)
4349
s = size(args[1])
4450
for x in args
4551
size(x) == s || throw(DimensionMismatch("cannot match array sizes"))
4652
end
4753
groups = Vector{Int}(undef, prod(s))
48-
ngroups, rhashes, gslots, sorted = row_group_slots(map(vec, args), Val(false), groups, !coalesce, false)
54+
ngroups, rhashes, gslots, sorted = row_group_slots(map(vec, args), Val(false), groups, !coalesce, sort)
4955
if !coalesce & any((eltype(x) >: Missing for x in args))
5056
T = Union{Int, Missing}
5157
else
@@ -78,8 +84,11 @@ end
7884
Base.allunique(x::GroupedRefPool) = true
7985

8086
DataAPI.refpool(g::GroupedArray{T}) where {T} = GroupedRefPool{T}(g.ngroups)
81-
# invrefpool is such that invrefpool[refpool[x]] = x. Basically, it gives the index in the pool (so the ref level) corresponding to each element of refpool
82-
# so it should be missing -> 0 and i -> i for 1 ≤ i ≤ g.ngroups
87+
# invrefpool is such that invrefpool[refpool[x]] = x.
88+
# In words, for each element of refpool, it associates the corresponding index in the pool
89+
# here, this gives
90+
# missing -> 0
91+
# i -> i for 1 ≤ i ≤ ngroups
8392
struct GroupedInvRefPool{T}
8493
ngroups::Int
8594
end

src/utils.jl

-1
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,6 @@ function row_group_slots(cols::NTuple{N, AbstractVector},
267267
# Computing neither hashes nor groups isn't very useful,
268268
# and this method needs to allocate a groups vector anyway
269269
@assert all(col -> length(col) == length(groups), cols)
270-
271270
missinginds = map(refpools) do refpool
272271
eltype(refpool) >: Missing ?
273272
something(findfirst(ismissing, refpool), lastindex(refpool)+1) : lastindex(refpool)+1

0 commit comments

Comments
 (0)