diff --git a/src/GenericLinearAlgebra.jl b/src/GenericLinearAlgebra.jl index c39aa32..0efd6a1 100644 --- a/src/GenericLinearAlgebra.jl +++ b/src/GenericLinearAlgebra.jl @@ -5,6 +5,7 @@ import LinearAlgebra: mul!, ldiv! include("juliaBLAS.jl") include("lapack.jl") include("cholesky.jl") +include("ldlt.jl") include("householder.jl") include("qr.jl") include("eigenSelfAdjoint.jl") diff --git a/src/juliaBLAS.jl b/src/juliaBLAS.jl index cb40e8b..fb75d4b 100644 --- a/src/juliaBLAS.jl +++ b/src/juliaBLAS.jl @@ -112,9 +112,9 @@ function rankUpdate!(C::Hermitian, A::StridedVecOrMat, α::Real) end ### Generic fallbacks -function lmul!(A::UpperTriangular{T,S}, B::StridedMatrix{T}, α::T) where {T<:Number,S} +function lmul!(A::UpperTriangular{T,S}, B::StridedVecOrMat{T}, α::T) where {T<:Number,S} AA = A.data - m, n = size(B) + m, n = size(B, 1), size(B, 2) for i ∈ 1:m for j ∈ 1:n B[i, j] = α * AA[i, i] * B[i, j] @@ -125,9 +125,9 @@ function lmul!(A::UpperTriangular{T,S}, B::StridedMatrix{T}, α::T) where {T<:Nu end return B end -function lmul!(A::LowerTriangular{T,S}, B::StridedMatrix{T}, α::T) where {T<:Number,S} +function lmul!(A::LowerTriangular{T,S}, B::StridedVecOrMat{T}, α::T) where {T<:Number,S} AA = A.data - m, n = size(B) + m, n = size(B, 1), size(B, 2) for i ∈ m:-1:1 for j ∈ 1:n B[i, j] = α * AA[i, i] * B[i, j] @@ -138,9 +138,9 @@ function lmul!(A::LowerTriangular{T,S}, B::StridedMatrix{T}, α::T) where {T<:Nu end return B end -function lmul!(A::UnitUpperTriangular{T,S}, B::StridedMatrix{T}, α::T) where {T<:Number,S} +function lmul!(A::UnitUpperTriangular{T,S}, B::StridedVecOrMat{T}, α::T) where {T<:Number,S} AA = A.data - m, n = size(B) + m, n = size(B, 1), size(B, 2) for i ∈ 1:m for j ∈ 1:n B[i, j] = α * B[i, j] @@ -151,9 +151,9 @@ function lmul!(A::UnitUpperTriangular{T,S}, B::StridedMatrix{T}, α::T) where {T end return B end -function lmul!(A::UnitLowerTriangular{T,S}, B::StridedMatrix{T}, α::T) where {T<:Number,S} +function lmul!(A::UnitLowerTriangular{T,S}, B::StridedVecOrMat{T}, α::T) where {T<:Number,S} AA = A.data - m, n = size(B) + m, n = size(B, 1), size(B, 2) for i ∈ m:-1:1 for j ∈ 1:n B[i, j] = α * B[i, j] diff --git a/src/ldlt.jl b/src/ldlt.jl new file mode 100644 index 0000000..a5622a8 --- /dev/null +++ b/src/ldlt.jl @@ -0,0 +1,232 @@ +import Base: * + +# When the lower triangle contains the input it is actually LDLt +# Only the lower triangle of the input matrix A is referenced +# +# [α ] = [1 0 ] [δ 0 ] [1 l' ] = [δ δ*l' ] +# [a A₋] [l L₋] [0 D₋] [0 L₋'] [l*δ l*δ*l' + L₋*D₋*L₋'] +# +# so +# +# δ = α +# l = δ \ a +# +# followed by the recursion on the updated lower block +# +# L₋'*D₋*L₋' = A₋ - l*δ*l' +function _ldlt_lower!(A::StridedMatrix) + n = size(A, 2) + δ = A[1, 1] + for i in 2:n + lᵢ = A[i, 1] / δ + A[i, 1] = lᵢ + end + for j in 2:n + lⱼδ = A[j, 1] * δ + for i in j:n + A[i, j] -= A[i, 1] * lⱼδ' + end + end + if n > 2 + _ldlt_lower!(view(A, 2:n, 2:n)) + end + return A +end + +# When the upper triangle contains the input the factorization is actially UcDU +# Only the upper triangle of the input matrix A is referenced +# +# [α aᵀ] = [1 0 ] [δ 0 ] [1 uᵀ] = [δ δ*uᵀ ] +# [ A₋] [u̅ U₋'] [0 D₋] [0 U₋] [u̅*δ u̅*δ*uᵀ + U₋'*D₋*U₋] +# +# so +# +# δ = α +# uᵀ = δ \ aᵀ +# +# followed by the recursion on the updated lower block +# +# U₋'*D₋*U₋ = A₋ - u̅*δ*uᵀ +function _ldlt_upper!(A::StridedMatrix) + n = size(A, 2) + δ = A[1, 1] + for j in 2:n + δlⱼᶜ = A[1, j] + A[1, j] = δlⱼᶜ / δ + for i in 2:j + A[i, j] -= A[1, i]' * δlⱼᶜ + end + end + if n > 2 + _ldlt_upper!(view(A, 2:n, 2:n)) + end + return A +end + +# When the lower triangle contains the input it is actually LDLt +# Only the lower triangle of the input matrix A is referenced +# +# [A₁₁ ] = [L₁₁ 0 ] [D₁ 0 ] [L₁₁' L₂₁'] = [L₁₁*D₁*L₁₁' L₁₁*D₁*L₂₁' ] +# [A₂₁ A₂₂] [L₂₁ L₂₂] [0 D₂] [0 L₂₂'] [L₂₁*D₁*L₁₁' L₂₁*D₁*L₂₁' + L₂₂*D₂*L₂₂'] +# +# so +# +# L₁₁*D₁*L₁₁' = A₁₁ +# L₂₁ = (A₂₁/L₁₁')/D₁ +# +# and the recursion on the updated lower block +# +# L₂₂*D₂*L₂₂' = A₂₂ - L₂₁*D₁*L₂₁' +function _ldlt_lower_blocked!(A::StridedMatrix{T}, blocksize::Int, workspace::Vector{T} = Array{T}(undef, blocksize)) where T + n = size(A, 2) + if blocksize < n + A₁₁ = view(A, 1:blocksize, 1:blocksize) + _ldlt_lower!(A₁₁) + d₁ = view(A₁₁, diagind(A₁₁)) + rdiv!(view(A, (blocksize + 1):n, 1:blocksize), UnitLowerTriangular(A₁₁)') + rdiv!(view(A, (blocksize + 1):n, 1:blocksize), Diagonal(d₁)) + @inbounds for j in (blocksize + 1):n + workspace .= d₁ .* conj.(view(A, j, 1:blocksize)) + for i in j:n + for k in 1:blocksize + A[i, j] -= A[i, k] * workspace[k] + end + end + end + _ldlt_lower_blocked!(view(A, (blocksize + 1):n, (blocksize + 1):n), blocksize, workspace) + return A + else + # If the input matrix is smaller than the chosen block size then + # we just use the unblocked versions + return _ldlt_lower!(A) + end +end + +# When the upper triangle contains the input the factorization is actially UcDU +# Only the upper triangle of the input matrix A is referenced +# +# [A₁₁ A₁₂] = [U₁₁' U₁₂'] [D₁ 0 ] [U₁₁ U₁₂] = [U₁₁'*D₁*U₁₁ U₁₁'*D₁*U₁₂ ] +# [ A₂₂] [0. U₂₂'] [0 D₂] [0 U₂₂] [U₁₂'*D₁*U₁₁ U₁₂'*D₁*U₁₂ + U₂₂'*D₂*U₂₂] +# +# so +# +# U₁₁'*D₁*U₁₁ = A₁₁ +# U₁₂ = D₁ \ (U₁₁' \ A₁₂) +# +# and the recursion on the updated lower block +# +# U₂₂' * D₂ * U₂₂ = A₂₂ - U₁₂' * D₁ * U₁₂ +function _ldlt_upper_blocked!(A::StridedMatrix{T}, blocksize::Int, workspace::Vector{T} = Array{T}(undef, blocksize)) where T + n = size(A, 2) + if blocksize < n + A₁₁ = view(A, 1:blocksize, 1:blocksize) + _ldlt_upper!(A₁₁) + d₁ = view(A₁₁, diagind(A₁₁)) + U₁₂ = view(A, 1:blocksize, (blocksize + 1):n) + ldiv!(UnitUpperTriangular(A₁₁)', U₁₂) + ldiv!(Diagonal(d₁), U₁₂) + @inbounds for j in (blocksize + 1):n + workspace .= d₁ .* view(A, 1:blocksize, j) + for i in (blocksize + 1):j + for k in 1:blocksize + A[i, j] -= A[k, i]' * workspace[k] + end + end + end + _ldlt_upper_blocked!(view(A, (blocksize + 1):n, (blocksize + 1):n), blocksize, workspace) + return A + else + # If the input matrix is smaller than the chosen block size then + # we just use the unblocked versions + return _ldlt_upper!(A) + end +end + +""" + ldlt!(A::Hermitian)::LTLt + +See [`ldlt`](@ref) +""" +function LinearAlgebra.ldlt!(A::Hermitian{T}, blocksize::Int = 32 ÷ sizeof(T)) where T + if A.uplo === 'U' + _ldlt_upper_blocked!(A.data, blocksize) + else + _ldlt_lower_blocked!(A.data, blocksize) + end + return LDLt(A) +end + +""" + ldlt(A::Hermitian)::LTLt + +A Hermitian LDL factorization of `A` such that `A = L*D*L'` if `A.uplo == 'L'` +and `A = U'*D*U` if `A.uplo == 'U'. Hence, the `t` is a bit of a misnomer, +but the name was introduced for real symmetric matrices where there is +no difference between the two. + +Only the elements specified by `uplo` in the `Hermitian` input will be +referenced. + +The factorization has three properties: `d`, `D`, and `L` which is respectively +a vector of the diagonal elements of `D`, the `Diagonal` matrix `D` and the `L` +matrix when `A.uplo == 'L'` or the `adjoint` of the `U` matrix when `A.uplo == 'U'`. + +The factorization is blocked. Currently, the blocking size is set to `32 ÷ sizeof(eltype(A))` +based on very rudimentary benchmarking on my laptop. + +# Examples +```jldoctest +julia> ldlt(Hermitian([1 1; 1 -1])) +LDLt{Int64, Hermitian{Int64, Matrix{Int64}}} +L factor: +2×2 UnitLowerTriangular{Int64, Adjoint{Int64, Matrix{Int64}}}: + 1 ⋅ + 1 1 +D factor: +2×2 Diagonal{Int64, SubArray{Int64, 1, Base.ReshapedArray{Int64, 1, Hermitian{Int64, Matrix{Int64}}, Tuple{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64}}}, Tuple{StepRange{Int64, Int64}}, false}}: + 1 ⋅ + ⋅ -2 + +julia> ldlt(Hermitian([1 1; 1 1], :L)) +LDLt{Int64, Hermitian{Int64, Matrix{Int64}}} +L factor: +2×2 UnitLowerTriangular{Int64, Matrix{Int64}}: + 1 ⋅ + 1 1 +D factor: +2×2 Diagonal{Int64, SubArray{Int64, 1, Base.ReshapedArray{Int64, 1, Hermitian{Int64, Matrix{Int64}}, Tuple{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64}}}, Tuple{StepRange{Int64, Int64}}, false}}: + 1 ⋅ + ⋅ 0 +``` +""" +LinearAlgebra.ldlt(A::Hermitian{T}, blocksize::Int = 32 ÷ sizeof(T)) where T = ldlt!(LinearAlgebra.copy_oftype(A, typeof(oneunit(T) / one(T)))) + +function Base.getproperty(F::LDLt{<:Any, <:Hermitian}, d::Symbol) + Fdata = getfield(F, :data) + if d === :d + return view(Fdata, diagind(Fdata)) + elseif d === :D + return Diagonal(F.d) + elseif d === :L + if Fdata.uplo === 'L' + return UnitLowerTriangular(Fdata.data) + else + return UnitUpperTriangular(Fdata.data)' + end + else + return getfield(F, d) + end +end + +Base.copy(F::LDLt) = LDLt(copy(F.data)) + +function LinearAlgebra.ldiv!(F::LDLt{<:Any, <:Hermitian}, X::StridedVecOrMat) + L = F.L + ldiv!(L', ldiv!(F.D, ldiv!(L, X))) +end +function LinearAlgebra.lmul!(F::LDLt{<:Any, <:Hermitian}, X::StridedVecOrMat) + L = F.L + lmul!(L, lmul!(F.D, lmul!(L', X))) +end + +(*)(F::LDLt, X::AbstractVecOrMat) = lmul!(F, copy(X)) diff --git a/test/ldlt.jl b/test/ldlt.jl new file mode 100644 index 0000000..4f72f82 --- /dev/null +++ b/test/ldlt.jl @@ -0,0 +1,21 @@ +using Test, LinearAlgebra, Random, Quaternions + +Random.seed!(123) + +@testset "ldlt. n=$n, uplo=$uplo" for n in [5, 50, 500], uplo in [:U, :L] + A = [Quaternion(randn(4)...) for i in 1:n, j in 1:(n - 2)] + Brr = Hermitian(A * A', uplo) + o = ones(n) + + @testset "basics" begin + Frr = ldlt(Brr) + @test Frr.L*Frr.D*Frr.L' ≈ Brr + end + + @testset "solves" begin + Bfr = Brr + I + Ffr = ldlt(Bfr) + @test Bfr*(Ffr\o) ≈ o + @test Ffr*(Ffr\o) ≈ o + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 4de5615..e3c3df7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,6 +3,7 @@ using Test # @testset "The LinearAlgebra Test Suite" begin include("juliaBLAS.jl") include("cholesky.jl") +include("ldlt.jl") include("qr.jl") include("eigenselfadjoint.jl") include("eigengeneral.jl")