diff --git a/src/GenericLinearAlgebra.jl b/src/GenericLinearAlgebra.jl
index c39aa32..0efd6a1 100644
--- a/src/GenericLinearAlgebra.jl
+++ b/src/GenericLinearAlgebra.jl
@@ -5,6 +5,7 @@ import LinearAlgebra: mul!, ldiv!
 include("juliaBLAS.jl")
 include("lapack.jl")
 include("cholesky.jl")
+include("ldlt.jl")
 include("householder.jl")
 include("qr.jl")
 include("eigenSelfAdjoint.jl")
diff --git a/src/juliaBLAS.jl b/src/juliaBLAS.jl
index cb40e8b..fb75d4b 100644
--- a/src/juliaBLAS.jl
+++ b/src/juliaBLAS.jl
@@ -112,9 +112,9 @@ function rankUpdate!(C::Hermitian, A::StridedVecOrMat, α::Real)
 end
 
 ### Generic fallbacks
-function lmul!(A::UpperTriangular{T,S}, B::StridedMatrix{T}, α::T) where {T<:Number,S}
+function lmul!(A::UpperTriangular{T,S}, B::StridedVecOrMat{T}, α::T) where {T<:Number,S}
     AA = A.data
-    m, n = size(B)
+    m, n = size(B, 1), size(B, 2)
     for i ∈ 1:m
         for j ∈ 1:n
             B[i, j] = α * AA[i, i] * B[i, j]
@@ -125,9 +125,9 @@ function lmul!(A::UpperTriangular{T,S}, B::StridedMatrix{T}, α::T) where {T<:Nu
     end
     return B
 end
-function lmul!(A::LowerTriangular{T,S}, B::StridedMatrix{T}, α::T) where {T<:Number,S}
+function lmul!(A::LowerTriangular{T,S}, B::StridedVecOrMat{T}, α::T) where {T<:Number,S}
     AA = A.data
-    m, n = size(B)
+    m, n = size(B, 1), size(B, 2)
     for i ∈ m:-1:1
         for j ∈ 1:n
             B[i, j] = α * AA[i, i] * B[i, j]
@@ -138,9 +138,9 @@ function lmul!(A::LowerTriangular{T,S}, B::StridedMatrix{T}, α::T) where {T<:Nu
     end
     return B
 end
-function lmul!(A::UnitUpperTriangular{T,S}, B::StridedMatrix{T}, α::T) where {T<:Number,S}
+function lmul!(A::UnitUpperTriangular{T,S}, B::StridedVecOrMat{T}, α::T) where {T<:Number,S}
     AA = A.data
-    m, n = size(B)
+    m, n = size(B, 1), size(B, 2)
     for i ∈ 1:m
         for j ∈ 1:n
             B[i, j] = α * B[i, j]
@@ -151,9 +151,9 @@ function lmul!(A::UnitUpperTriangular{T,S}, B::StridedMatrix{T}, α::T) where {T
     end
     return B
 end
-function lmul!(A::UnitLowerTriangular{T,S}, B::StridedMatrix{T}, α::T) where {T<:Number,S}
+function lmul!(A::UnitLowerTriangular{T,S}, B::StridedVecOrMat{T}, α::T) where {T<:Number,S}
     AA = A.data
-    m, n = size(B)
+    m, n = size(B, 1), size(B, 2)
     for i ∈ m:-1:1
         for j ∈ 1:n
             B[i, j] = α * B[i, j]
diff --git a/src/ldlt.jl b/src/ldlt.jl
new file mode 100644
index 0000000..a5622a8
--- /dev/null
+++ b/src/ldlt.jl
@@ -0,0 +1,232 @@
+import Base: *
+
+# When the lower triangle contains the input it is actually LDLt
+# Only the lower triangle of the input matrix A is referenced
+#
+# [α   ] = [1 0 ] [δ 0 ] [1 l' ] = [δ         δ*l'        ]
+# [a A₋]   [l L₋] [0 D₋] [0 L₋']   [l*δ l*δ*l' + L₋*D₋*L₋']
+#
+# so
+#
+# δ = α
+# l = δ \ a
+#
+# followed by the recursion on the updated lower block
+#
+# L₋'*D₋*L₋' = A₋ - l*δ*l'
+function _ldlt_lower!(A::StridedMatrix)
+    n = size(A, 2)
+    δ = A[1, 1]
+    for i in 2:n
+        lᵢ = A[i, 1] / δ
+        A[i, 1] = lᵢ
+    end
+    for j in 2:n
+        lⱼδ = A[j, 1] * δ
+        for i in j:n
+            A[i, j] -= A[i, 1] * lⱼδ'
+        end
+    end
+    if n > 2
+        _ldlt_lower!(view(A, 2:n, 2:n))
+    end
+    return A
+end
+
+# When the upper triangle contains the input the factorization is actially UcDU
+# Only the upper triangle of the input matrix A is referenced
+#
+# [α aᵀ] = [1 0  ] [δ 0 ] [1 uᵀ] = [δ         δ*uᵀ     ]
+# [  A₋]   [u̅ U₋'] [0 D₋] [0 U₋]   [u̅*δ u̅*δ*uᵀ + U₋'*D₋*U₋]
+#
+# so
+#
+# δ = α
+# uᵀ = δ \ aᵀ
+#
+# followed by the recursion on the updated lower block
+#
+# U₋'*D₋*U₋ = A₋ - u̅*δ*uᵀ
+function _ldlt_upper!(A::StridedMatrix)
+    n = size(A, 2)
+    δ = A[1, 1]
+    for j in 2:n
+        δlⱼᶜ = A[1, j]
+        A[1, j] = δlⱼᶜ / δ
+        for i in 2:j
+            A[i, j] -= A[1, i]' * δlⱼᶜ
+        end
+    end
+    if n > 2
+        _ldlt_upper!(view(A, 2:n, 2:n))
+    end
+    return A
+end
+
+# When the lower triangle contains the input it is actually LDLt
+# Only the lower triangle of the input matrix A is referenced
+#
+# [A₁₁    ] = [L₁₁  0 ] [D₁ 0 ] [L₁₁' L₂₁'] = [L₁₁*D₁*L₁₁'        L₁₁*D₁*L₂₁'       ]
+# [A₂₁ A₂₂]   [L₂₁ L₂₂] [0  D₂] [0    L₂₂']   [L₂₁*D₁*L₁₁' L₂₁*D₁*L₂₁' + L₂₂*D₂*L₂₂']
+#
+# so
+#
+# L₁₁*D₁*L₁₁' = A₁₁
+# L₂₁ = (A₂₁/L₁₁')/D₁
+#
+# and the recursion on the updated lower block
+#
+# L₂₂*D₂*L₂₂' = A₂₂ - L₂₁*D₁*L₂₁'
+function _ldlt_lower_blocked!(A::StridedMatrix{T}, blocksize::Int, workspace::Vector{T} = Array{T}(undef, blocksize)) where T
+    n = size(A, 2)
+    if blocksize < n
+        A₁₁ = view(A, 1:blocksize, 1:blocksize)
+        _ldlt_lower!(A₁₁)
+        d₁ = view(A₁₁, diagind(A₁₁))
+        rdiv!(view(A, (blocksize + 1):n, 1:blocksize), UnitLowerTriangular(A₁₁)')
+        rdiv!(view(A, (blocksize + 1):n, 1:blocksize), Diagonal(d₁))
+        @inbounds for j in (blocksize + 1):n
+            workspace .= d₁ .* conj.(view(A, j, 1:blocksize))
+            for i in j:n
+                for k in 1:blocksize
+                    A[i, j] -= A[i, k] * workspace[k]
+                end
+            end
+        end
+        _ldlt_lower_blocked!(view(A, (blocksize + 1):n, (blocksize + 1):n), blocksize, workspace)
+        return A
+    else
+        # If the input matrix is smaller than the chosen block size then
+        # we just use the unblocked versions
+        return _ldlt_lower!(A)
+    end
+end
+
+# When the upper triangle contains the input the factorization is actially UcDU
+# Only the upper triangle of the input matrix A is referenced
+#
+# [A₁₁ A₁₂] = [U₁₁' U₁₂'] [D₁ 0 ] [U₁₁ U₁₂] = [U₁₁'*D₁*U₁₁        U₁₁'*D₁*U₁₂       ]
+# [    A₂₂]   [0.   U₂₂'] [0  D₂] [0   U₂₂]   [U₁₂'*D₁*U₁₁ U₁₂'*D₁*U₁₂ + U₂₂'*D₂*U₂₂]
+#
+# so
+#
+# U₁₁'*D₁*U₁₁ = A₁₁
+# U₁₂ = D₁ \ (U₁₁' \ A₁₂)
+#
+# and the recursion on the updated lower block
+#
+# U₂₂' * D₂ * U₂₂ = A₂₂ - U₁₂' * D₁ * U₁₂
+function _ldlt_upper_blocked!(A::StridedMatrix{T}, blocksize::Int, workspace::Vector{T} = Array{T}(undef, blocksize)) where T
+    n = size(A, 2)
+    if blocksize < n
+        A₁₁ = view(A, 1:blocksize, 1:blocksize)
+        _ldlt_upper!(A₁₁)
+        d₁ = view(A₁₁, diagind(A₁₁))
+        U₁₂ = view(A, 1:blocksize, (blocksize + 1):n)
+        ldiv!(UnitUpperTriangular(A₁₁)', U₁₂)
+        ldiv!(Diagonal(d₁), U₁₂)
+        @inbounds for j in (blocksize + 1):n
+            workspace .= d₁ .* view(A, 1:blocksize, j)
+            for i in (blocksize + 1):j
+                for k in 1:blocksize
+                    A[i, j] -= A[k, i]' * workspace[k]
+                end
+            end
+        end
+        _ldlt_upper_blocked!(view(A, (blocksize + 1):n, (blocksize + 1):n), blocksize, workspace)
+        return A
+    else
+        # If the input matrix is smaller than the chosen block size then
+        # we just use the unblocked versions
+        return _ldlt_upper!(A)
+    end
+end
+
+"""
+    ldlt!(A::Hermitian)::LTLt
+
+See [`ldlt`](@ref)
+"""
+function LinearAlgebra.ldlt!(A::Hermitian{T}, blocksize::Int = 32 ÷ sizeof(T)) where T
+    if A.uplo === 'U'
+        _ldlt_upper_blocked!(A.data, blocksize)
+    else
+        _ldlt_lower_blocked!(A.data, blocksize)
+    end
+    return LDLt(A)
+end
+
+"""
+    ldlt(A::Hermitian)::LTLt
+
+A Hermitian LDL factorization of `A` such that `A = L*D*L'` if `A.uplo == 'L'`
+and `A = U'*D*U` if `A.uplo == 'U'. Hence, the `t` is a bit of a misnomer,
+but the name was introduced for real symmetric matrices where there is
+no difference between the two.
+
+Only the elements specified by `uplo` in the `Hermitian` input will be
+referenced.
+
+The factorization has three properties: `d`, `D`, and `L` which is respectively
+a vector of the diagonal elements of `D`, the `Diagonal` matrix `D` and the `L`
+matrix when `A.uplo == 'L'` or the `adjoint` of the `U` matrix when `A.uplo == 'U'`.
+
+The factorization is blocked. Currently, the blocking size is set to `32 ÷ sizeof(eltype(A))`
+based on very rudimentary benchmarking on my laptop.
+
+# Examples
+```jldoctest
+julia> ldlt(Hermitian([1 1; 1 -1]))
+LDLt{Int64, Hermitian{Int64, Matrix{Int64}}}
+L factor:
+2×2 UnitLowerTriangular{Int64, Adjoint{Int64, Matrix{Int64}}}:
+ 1  ⋅
+ 1  1
+D factor:
+2×2 Diagonal{Int64, SubArray{Int64, 1, Base.ReshapedArray{Int64, 1, Hermitian{Int64, Matrix{Int64}}, Tuple{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64}}}, Tuple{StepRange{Int64, Int64}}, false}}:
+ 1   ⋅
+ ⋅  -2
+
+julia> ldlt(Hermitian([1 1; 1 1], :L))
+LDLt{Int64, Hermitian{Int64, Matrix{Int64}}}
+L factor:
+2×2 UnitLowerTriangular{Int64, Matrix{Int64}}:
+ 1  ⋅
+ 1  1
+D factor:
+2×2 Diagonal{Int64, SubArray{Int64, 1, Base.ReshapedArray{Int64, 1, Hermitian{Int64, Matrix{Int64}}, Tuple{Base.MultiplicativeInverses.SignedMultiplicativeInverse{Int64}}}, Tuple{StepRange{Int64, Int64}}, false}}:
+ 1  ⋅
+ ⋅  0
+```
+"""
+LinearAlgebra.ldlt(A::Hermitian{T}, blocksize::Int = 32 ÷ sizeof(T)) where T = ldlt!(LinearAlgebra.copy_oftype(A, typeof(oneunit(T) / one(T))))
+
+function Base.getproperty(F::LDLt{<:Any, <:Hermitian}, d::Symbol)
+    Fdata = getfield(F, :data)
+    if d === :d
+        return view(Fdata, diagind(Fdata))
+    elseif d === :D
+        return Diagonal(F.d)
+    elseif d === :L
+        if Fdata.uplo === 'L'
+            return UnitLowerTriangular(Fdata.data)
+        else
+            return UnitUpperTriangular(Fdata.data)'
+        end
+    else
+        return getfield(F, d)
+    end
+end
+
+Base.copy(F::LDLt) = LDLt(copy(F.data))
+
+function LinearAlgebra.ldiv!(F::LDLt{<:Any, <:Hermitian}, X::StridedVecOrMat)
+    L = F.L
+    ldiv!(L', ldiv!(F.D, ldiv!(L, X)))
+end
+function LinearAlgebra.lmul!(F::LDLt{<:Any, <:Hermitian}, X::StridedVecOrMat)
+    L = F.L
+    lmul!(L, lmul!(F.D, lmul!(L', X)))
+end
+
+(*)(F::LDLt, X::AbstractVecOrMat) = lmul!(F, copy(X))
diff --git a/test/ldlt.jl b/test/ldlt.jl
new file mode 100644
index 0000000..4f72f82
--- /dev/null
+++ b/test/ldlt.jl
@@ -0,0 +1,21 @@
+using Test, LinearAlgebra, Random, Quaternions
+
+Random.seed!(123)
+
+@testset "ldlt. n=$n, uplo=$uplo" for n in [5, 50, 500], uplo in [:U, :L]
+    A = [Quaternion(randn(4)...) for i in 1:n, j in 1:(n - 2)]
+    Brr = Hermitian(A * A', uplo)
+    o = ones(n)
+
+    @testset "basics" begin
+        Frr = ldlt(Brr)
+        @test Frr.L*Frr.D*Frr.L' ≈ Brr
+    end
+
+    @testset "solves" begin
+        Bfr = Brr + I
+        Ffr = ldlt(Bfr)
+        @test Bfr*(Ffr\o) ≈ o
+        @test Ffr*(Ffr\o) ≈ o
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 4de5615..e3c3df7 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,6 +3,7 @@ using Test
 # @testset "The LinearAlgebra Test Suite" begin
 include("juliaBLAS.jl")
 include("cholesky.jl")
+include("ldlt.jl")
 include("qr.jl")
 include("eigenselfadjoint.jl")
 include("eigengeneral.jl")