@@ -333,11 +333,17 @@ function find_breaks(v::AbstractVector, qs::AbstractVector)
333333 return breaks
334334end
335335
336+ # AbstractWeights method is defined in StatsBase extension
337+ # There is no in-place weighted quantile method in StatsBase
338+ _wquantile (x:: AbstractArray , w:: AbstractVector , p:: AbstractVector ) =
339+ throw (ArgumentError (" `weights` must be an `AbstractWeights` vector from StatsBase.jl" ))
340+
336341"""
337342 cut(x::AbstractArray, ngroups::Integer;
338343 labels::Union{AbstractVector{<:AbstractString},Function},
339344 sigdigits::Integer=3,
340- allowempty::Bool=false)
345+ allowempty::Bool=false,
346+ weights::Union{AbstractWeights, Nothing}=nothing)
341347
342348Cut a numeric array into `ngroups` quantiles.
343349
@@ -369,19 +375,41 @@ quantiles.
369375 other than the last one are equal, generating empty intervals;
370376 when `true`, duplicate breaks are allowed and the intervals they generate are kept as
371377 unused levels (but duplicate labels are not allowed).
378+ * `weights::Union{AbstractWeights, Nothing}=nothing`: observations weights to used when
379+ computing quantiles (see `quantile` documentation in StatsBase).
372380"""
373381function cut (x:: AbstractArray , ngroups:: Integer ;
374382 labels:: Union{AbstractVector{<:SupportedTypes},Function,Nothing} = nothing ,
375383 sigdigits:: Integer = 3 ,
376- allowempty:: Bool = false )
384+ allowempty:: Bool = false ,
385+ weights:: Union{AbstractVector, Nothing} = nothing )
377386 ngroups >= 1 || throw (ArgumentError (" ngroups must be strictly positive (got $ngroups )" ))
378- sorted_x = eltype (x) >: Missing ? sort! (collect (skipmissing (x))) : sort (x)
379- min_x, max_x = first (sorted_x), last (sorted_x)
380- if (min_x isa Number && isnan (min_x)) ||
381- (max_x isa Number && isnan (max_x))
382- throw (ArgumentError (" NaN values are not allowed in input vector" ))
387+ if weights === nothing
388+ sorted_x = eltype (x) >: Missing ? sort! (collect (skipmissing (x))) : sort (x)
389+ min_x, max_x = first (sorted_x), last (sorted_x)
390+ if (min_x isa Number && isnan (min_x)) ||
391+ (max_x isa Number && isnan (max_x))
392+ throw (ArgumentError (" NaN values are not allowed in input vector" ))
393+ end
394+ qs = quantile! (sorted_x, (1 : (ngroups- 1 ))/ ngroups, sorted= true )
395+ else
396+ if eltype (x) >: Missing
397+ nm_inds = findall (! ismissing, x)
398+ nm_x = view (x, nm_inds)
399+ # TODO : use a view once this is supported (JuliaStats/StatsBase.jl#723)
400+ nm_weights = weights[nm_inds]
401+ else
402+ nm_x = x
403+ nm_weights = weights
404+ end
405+ sorted_x = sort (nm_x)
406+ min_x, max_x = first (sorted_x), last (sorted_x)
407+ if (min_x isa Number && isnan (min_x)) ||
408+ (max_x isa Number && isnan (max_x))
409+ throw (ArgumentError (" NaN values are not allowed in input vector" ))
410+ end
411+ qs = _wquantile (nm_x, nm_weights, (1 : (ngroups- 1 ))/ ngroups)
383412 end
384- qs = quantile! (sorted_x, (1 : (ngroups- 1 ))/ ngroups, sorted= true )
385413 breaks = [min_x; find_breaks (sorted_x, qs); max_x]
386414 if ! allowempty && ! allunique (@view breaks[1 : end - 1 ])
387415 throw (ArgumentError (" cannot compute $ngroups quantiles due to " *
0 commit comments