Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
name = "Statistics"
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intentional?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. It's a hack to make sure julia knows to load this folder, it's described here for Pkg.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normally the Travis script does that automatically, so you can revert this: https://github.com/JuliaLang/Statistics.jl/blob/master/.travis.yml#L24

Though you need it to run tests locally.


[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Expand Down
60 changes: 57 additions & 3 deletions src/Statistics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ unscaled_covzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int) =
(vardim == 1 ? *(transpose(x), _conj(y)) : *(x, adjoint(y)))

# covzm (with centered data)

covzm(itr::Any; corrected::Bool = true) = covzm(collect(itr); corrected = corrected)
covzm(x::AbstractVector; corrected::Bool=true) = unscaled_covzm(x) / (length(x) - Int(corrected))
function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
C = unscaled_covzm(x, vardim)
Expand All @@ -504,6 +504,7 @@ function covzm(x::AbstractMatrix, vardim::Int=1; corrected::Bool=true)
A .= A .* b
return A
end
covzm(x::Any, y::Any; corrected::Bool = true) = covzm(collect(x), collect(y); corrected = corrected)
covzm(x::AbstractVector, y::AbstractVector; corrected::Bool=true) =
unscaled_covzm(x, y) / (length(x) - Int(corrected))
function covzm(x::AbstractVecOrMat, y::AbstractVecOrMat, vardim::Int=1; corrected::Bool=true)
Expand All @@ -518,16 +519,32 @@ end
# covm (with provided mean)
## Use map(t -> t - xmean, x) instead of x .- xmean to allow for Vector{Vector}
## which can't be handled by broadcast
covm(itr::Any, itrmean; corrected::Bool=true) =
@show covm(collect(itr), itrmean; corrected=corrected)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
@show covm(collect(itr), itrmean; corrected=corrected)
covm(collect(itr), itrmean; corrected=corrected)

covm(x::AbstractVector, xmean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x); corrected=corrected)
covm(x::AbstractMatrix, xmean, vardim::Int=1; corrected::Bool=true) =
covzm(x .- xmean, vardim; corrected=corrected)
covm(x::Any, xmean, y::Any, ymean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected)
covm(x::AbstractVector, xmean, y::AbstractVector, ymean; corrected::Bool=true) =
covzm(map(t -> t - xmean, x), map(t -> t - ymean, y); corrected=corrected)
covm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1; corrected::Bool=true) =
covzm(x .- xmean, y .- ymean, vardim; corrected=corrected)

# cov (API)
"""
cov(itr::Any; corrected::Bool=true)

Compute the variance of the iterator `itr`. If `corrected` is `true` (the default) then the sum
is scaled with `n-1`, whereas the sum is scaled with `n` if `corrected` is `false` where
`n = length(collect(itr))`.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
`n = length(collect(itr))`.
``n`` is the number of elements.

"""
function cov(itr::Any; corrected::Bool=true)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we want to allow 0 or more than 2 dimensional arrays here?

x = collect(itr)
covm(x, mean(x); corrected=corrected)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better call covzm directly to avoid an additional copy:

Suggested change
covm(x, mean(x); corrected=corrected)
covzm(map!(t -> t - xmean, x, x); corrected=corrected)

Same for the two-argument method.

end

"""
cov(x::AbstractVector; corrected::Bool=true)

Expand All @@ -546,6 +563,22 @@ if `corrected` is `false` where `n = size(X, dims)`.
cov(X::AbstractMatrix; dims::Int=1, corrected::Bool=true) =
covm(X, _vmean(X, dims), dims; corrected=corrected)


"""
cov(x::Any, y::Any; corrected::Bool=true)

Compute the covariance between the iterators `x` and `y`. If `corrected` is `true` (the
default), computes ``\\frac{1}{n-1}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*`` where
``*`` denotes the complex conjugate and `n = length(collect(x)) = length(collect(y))`. If `corrected` is
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
``*`` denotes the complex conjugate and `n = length(collect(x)) = length(collect(y))`. If `corrected` is
``*`` denotes the complex conjugate and ``n`` the number of elements. If `corrected` is

`false`, computes ``\\frac{1}{n}\\sum_{i=1}^n (x_i-\\bar x) (y_i-\\bar y)^*``.
"""
function cov(x::Any, y::Any; corrected::Bool=true)
cx = collect(x)
cy = collect(y)

covm(cx, mean(cx), cy, mean(cy); corrected=corrected)
end

"""
cov(x::AbstractVector, y::AbstractVector; corrected::Bool=true)

Expand Down Expand Up @@ -630,7 +663,7 @@ function cov2cor!(C::AbstractMatrix, xsd::AbstractArray, ysd::AbstractArray)
end

# corzm (non-exported, with centered data)

corzm(x::Any) = corzm(collect(x))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same remark here and for corm as for cor about using the eltype when it's known.

corzm(x::AbstractVector{T}) where {T} = one(real(T))
function corzm(x::AbstractMatrix, vardim::Int=1)
c = unscaled_covzm(x, vardim)
Expand All @@ -644,9 +677,10 @@ corzm(x::AbstractMatrix, y::AbstractMatrix, vardim::Int=1) =
cov2cor!(unscaled_covzm(x, y, vardim), sqrt!(sum(abs2, x, dims=vardim)), sqrt!(sum(abs2, y, dims=vardim)))

# corm

corm(x::Any, xmean) = corm(collect(x), xmean)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also apply the eltype check here.

corm(x::AbstractVector{T}, xmean) where {T} = one(real(T))
corm(x::AbstractMatrix, xmean, vardim::Int=1) = corzm(x .- xmean, vardim)
corm(x::Any, mx, y::Any, my) = corm(collect(x), mx, collect(y), my)
function corm(x::AbstractVector, mx, y::AbstractVector, my)
require_one_based_indexing(x, y)
n = length(x)
Expand Down Expand Up @@ -674,6 +708,14 @@ corm(x::AbstractVecOrMat, xmean, y::AbstractVecOrMat, ymean, vardim::Int=1) =
corzm(x .- xmean, y .- ymean, vardim)

# cor
"""
cor(itr::Any)

Return the number one.
"""
cor(itr::Any) = one(real(eltype(collect(itr))))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better first check whether Base.IteratorEltype(itr) isa Base.HasEltype && isconcrete(eltype(itr)), and in that case avoid calling collect.

Also remove the docstring for AbstractVector below, which is just a special case of this one.



"""
cor(x::AbstractVector)

Expand All @@ -688,6 +730,18 @@ Compute the Pearson correlation matrix of the matrix `X` along the dimension `di
"""
cor(X::AbstractMatrix; dims::Int=1) = corm(X, _vmean(X, dims), dims)

"""
cor(x::AbstractVector, y::AbstractVector)

Compute the Pearson correlation between the vectors `x` and `y`.
"""
function cor(x::Any, y::Any)
cx = collect(x)
cy = collect(y)

corm(cx, mean(cx), cy, mean(cy))
end

"""
cor(x::AbstractVector, y::AbstractVector)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove this docstring which is a special case of the previous one.


Expand Down
17 changes: 14 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -339,11 +339,16 @@ Y = [6.0 2.0;
x1 = vec(X[1,:])
y1 = vec(Y[1,:])
end
@show x1
x1_itr = (x1i for x1i in x1)
y1_itr = skipmissing(y1)

c = zm ? Statistics.covm(x1, 0, corrected=cr) :
cov(x1, corrected=cr)
c_itr = zm ? Statistics.covm(x1_itr, 0, corrected=cr) :
cov(x1_itr, corrected=cr)
@test isa(c, Float64)
@test c ≈ Cxx[1,1]
@test c ≈ c_itr ≈ Cxx[1,1]
@inferred cov(x1, corrected=cr)

@test cov(X) == Statistics.covm(X, mean(X, dims=1))
Expand All @@ -356,6 +361,8 @@ Y = [6.0 2.0;
@test cov(x1, y1) == Statistics.covm(x1, mean(x1), y1, mean(y1))
c = zm ? Statistics.covm(x1, 0, y1, 0, corrected=cr) :
cov(x1, y1, corrected=cr)
c_itr = zm ? Statistics.covm(x1_itr, 0, y1_itr, 0, corrected=cr) :
cov(x1_itr, y1_itr, corrected=cr)
@test isa(c, Float64)
@test c ≈ Cxy[1,1]
@inferred cov(x1, y1, corrected=cr)
Expand Down Expand Up @@ -426,10 +433,13 @@ end
x1 = vec(X[1,:])
y1 = vec(Y[1,:])
end
x1_itr = (x1i for x1i in x1)
y1_itr = skipmissing(y1)

c = zm ? Statistics.corm(x1, 0) : cor(x1)
c_itr = zm ? Statistics.corm(x1_itr, 0) : cor(x1_itr)
@test isa(c, Float64)
@test c ≈ Cxx[1,1]
@test c ≈ c_itr ≈ Cxx[1,1]
@inferred cor(x1)

@test cor(X) == Statistics.corm(X, mean(X, dims=1))
Expand All @@ -440,8 +450,9 @@ end

@test cor(x1, y1) == Statistics.corm(x1, mean(x1), y1, mean(y1))
c = zm ? Statistics.corm(x1, 0, y1, 0) : cor(x1, y1)
c_itr = zm ? Statistics.corm(x1_itr, 0, y1_itr, 0) : cor(x1_itr, y1_itr)
@test isa(c, Float64)
@test c ≈ Cxy[1,1]
@test c ≈ c_itr ≈ Cxy[1,1]
@inferred cor(x1, y1)

if vd == 1
Expand Down