Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
4f2a272
README: link to most recent Intel MKL docs
alyst Jan 3, 2025
0b23c3e
add top-level comments to clarify code structure
Dec 2, 2024
98f4a99
tests: ntries constant
alyst Jan 3, 2025
20ea8b9
tests: rework random matrices generation
alyst Jan 3, 2025
588bc01
tests: fix matdescra
alyst Jan 3, 2025
090d82a
tests: increase atol to reduce spurious failures
alyst Jan 3, 2025
08c64c5
check_map_op_sizes(): allow C=nothing
alyst Jan 3, 2025
6d502ad
check_map_op_sizes(): allow disabling specific checks
Sep 10, 2024
bd6fcae
matrix_descr(): edit specific fields
Sep 10, 2024
7e33a1b
use LazyString for exceptions
Sep 13, 2024
881edfa
rename typealias MKLSparseMat to SparseMat
Sep 13, 2024
611b663
tweak typealiases to improve precompilation times
alyst Jan 3, 2025
172c706
fix \ support in v1.9-v1.11
alyst Jan 3, 2025
8170704
add check_nzpattern() method
alyst Jan 3, 2025
9b3045e
convert(CSC/CSR, MKLSparseMtx): fix for empty mtx
alyst Jan 3, 2025
3dcebba
COO, CSR: overloads necessary for unit tests
alyst Jan 3, 2025
686b055
copy!(CSC/CSR, MKLSparseMatrix)
alyst Jan 3, 2025
c96f756
CSR/COO: improve dense conversion
alyst Jan 3, 2025
473d683
convert(CSR, a::CSC)
alyst Jan 3, 2025
90c05d2
add fastcopytri!() method
alyst Jan 3, 2025
ebab206
dual_opcode(op)
alyst Jan 3, 2025
d6cdeef
mul!(dense, sparse, sparse) support (sp2md!())
alyst Jan 3, 2025
1589ff1
mul!(sparse, sparse, sparse) support (sp2m())
alyst Jan 3, 2025
3dd1015
dense := A * A^T support (syrkd!())
alyst Jan 3, 2025
a8e9303
sparse := A * A^T support (syrk())
alyst Jan 3, 2025
8a3e231
dense := A * B * A^T support (syprd!())
alyst Jan 3, 2025
e878bcd
spmm() & spmmd!() (untested)
alyst Jan 3, 2025
4a7c463
fixup ws
Dec 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

[![codecov](https://codecov.io/gh/JuliaSparse/MKLSparse.jl/graph/badge.svg?token=j3KoKBEIt1)](https://codecov.io/gh/JuliaSparse/MKLSparse.jl)

`MKLSparse.jl` is a Julia package to seamlessly use the sparse functionality in MKL to speed up operations on sparse arrays in Julia.
In order to use `MKLSparse.jl` you do not need to install Intel's MKL library nor build Julia with MKL. `MKLSparse.jl` will automatically download and use the MKL library for you when installed.
*MKLSparse.jl* is a Julia package to seamlessly use the [sparse BLAS routines from Intel's Math Kernel Library (MKL)](https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c)
to speed up operations on sparse arrays in Julia.
In order to use *MKLSparse.jl* you do not need to install Intel's MKL library nor build Julia with MKL. *MKLSparse.jl* will automatically download and use the MKL library for you when installed.

### Matrix multiplications

Expand Down
2 changes: 1 addition & 1 deletion gen/wrapper.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ function wrapper(name::String, headers::Vector{String}, optimized::Bool=false)

args = get_default_args()
push!(args, "-I$include_dir")

ctx = create_context(headers, args, options)
build!(ctx)

Expand Down
5 changes: 5 additions & 0 deletions src/MKLSparse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ function _log_mklsparse_call(fname)
__mklsparse_calls_count[] += 1
end

# common MKL definitions from mkl_service.h, see also MKL.jl

@enum Threading begin
THREADING_INTEL
THREADING_SEQUENTIAL
Expand All @@ -39,13 +41,16 @@ function set_interface_layer(interface::Interface = INTERFACE_LP64)
return nothing
end

# initialize the MKL interface upon module initialization
# NOTE: this sets the interface for all MKL API calls, not just the sparse ones
function __init__()
set_interface_layer(INTERFACE_LP64)
end

# Wrappers generated by Clang.jl
include("libmklsparse.jl")
include("types.jl")
include("utils.jl")
include("mklsparsematrix.jl")

# TODO: BLAS1
Expand Down
249 changes: 249 additions & 0 deletions src/generic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,255 @@ function mm!(transA::Char, alpha::T, A::AbstractSparseMatrix{T}, descr::matrix_d
return C
end

# C := op(A) * B, where C is sparse
function spmm(transA::Char, A::S, B::S) where {S <: AbstractSparseMatrix}
check_trans(transA)
check_mat_op_sizes(nothing, A, transA, B, 'N')
Cout = Ref{sparse_matrix_t}()
hA = MKLSparseMatrix(A)
hB = MKLSparseMatrix(B)
res = mkl_call(Val{:mkl_sparse_spmmI}(), S,
transA, hA, hB, Cout)
destroy(hA)
destroy(hB)
check_status(res)
# NOTE: we are guessing what is the storage format of C
hC = MKLSparseMatrix{S}(Cout[])
C = convert(S, hC)
destroy(hC)
return C
end

# C := op(A) * B, where C is dense
function spmmd!(transa::Char, A::S, B::S,
C::StridedMatrix{T};
dense_layout::sparse_layout_t = SPARSE_LAYOUT_COLUMN_MAJOR
) where {S <: AbstractSparseMatrix{T}} where T
check_trans(transa)
check_mat_op_sizes(C, A, transa, B, 'N')
ldC = stride(C, 2)
hA = MKLSparseMatrix(A)
hB = MKLSparseMatrix(B)
res = mkl_call(Val{:mkl_sparse_T_spmmdI}(), S,
transa, hA, hB, dense_layout, C, ldC)
destroy(hA)
destroy(hB)
check_status(res)
return C
end

# C := opA(A) * opB(B), where C is sparse
function sp2m(transA::Char, A::S, descrA::matrix_descr,
transB::Char, B::S, descrB::matrix_descr
) where S <: AbstractSparseMatrix
check_trans(transA)
check_trans(transB)
check_mat_op_sizes(nothing, A, transA, B, transB)
Cout = Ref{sparse_matrix_t}()
hA = MKLSparseMatrix(A)
hB = MKLSparseMatrix(B)
res = mkl_call(Val{:mkl_sparse_sp2mI}(), S,
transA, descrA, hA, transB, descrB, hB,
SPARSE_STAGE_FULL_MULT, Cout)
destroy(hA)
destroy(hB)
check_status(res)
# NOTE: we are guessing what is the storage format of C
hC = MKLSparseMatrix{S}(Cout[])
C = convert(S, hC)
destroy(hC)
return C
end

# C := opA(A) * opB(B), where C is sparse, in-place version
# C should have the correct size and sparsity pattern
function sp2m!(transA::Char, A::S, descrA::matrix_descr,
transB::Char, B::S, descrB::matrix_descr,
C::S;
check_nzpattern::Bool = true
) where {S <: AbstractSparseMatrix}
check_trans(transA)
check_trans(transB)
check_mat_op_sizes(C, A, transA, B, transB)
hA = MKLSparseMatrix(A)
hB = MKLSparseMatrix(B)
if check_nzpattern
# pre-multiply A * B to get the number of nonzeros per column in the result
CptnOut = Ref{sparse_matrix_t}()
res = mkl_call(Val{:mkl_sparse_sp2mI}(), S,
transA, descrA, hA, transB, descrB, hB,
SPARSE_STAGE_NNZ_COUNT, CptnOut)
check_status(res)
hCptn = MKLSparseMatrix{S}(CptnOut[])
try
# check if C has the same per-column nonzeros as the result
MKLSparse.check_nzpattern(C, hCptn)
catch e
# destroy handles to A and B if the pattern check fails,
# otherwise reuse them at the actual multiplication
destroy(hA)
destroy(hB)
rethrow(e)
finally
destroy(hCptn)
end
# FIXME rowval not checked
end
# FIXME the optimal way would be to create the MKLSparse handle to C reusing its arrays
# and do SPARSE_STAGE_FINALIZE_MULT to directly write to the C.nzval
# but that causes segfaults when the handle is destroyed
# (also the partial mkl_sparse_copy(C) workaround to reuse the nz structure segfaults)
# see the note stating that external memory is not currently supported:
# https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c/2024-2/two-stage-algorithm-in-inspect-execute-sp-blas.html
#hC = MKLSparseMatrix(C)
#mkl_call(Val{:mkl_sparse_set_memory_hintI}(), typeof(C), SPARSE_MEMORY_NONE)
#hC_ref = Ref(hC)
#res = mkl_call(Val{:mkl_sparse_sp2mI}(), typeof(A),
# transA, descrA, hA, transB, descrB, hB,
# SPARSE_STAGE_FINALIZE_MULT, hC_ref)
#@assert hC_ref[] == hC
# so instead we do the full multiplication and copy the result into C nzvals
hCopy_ref = Ref{sparse_matrix_t}()
# don't log if it's the 2nd mkl_call
res = mkl_call(Val{:mkl_sparse_sp2mI}(), S,
transA, descrA, hA, transB, descrB, hB,
SPARSE_STAGE_FULL_MULT, hCopy_ref, log = Val(!check_nzpattern))
destroy(hA)
destroy(hB)
#destroy(hC)
check_status(res)
if hCopy_ref[] != C_NULL
hCopy = MKLSparseMatrix{S}(hCopy_ref[])
copy!(C, hCopy; check_nzpattern)
destroy(hCopy)
end
return C
end

# C := alpha * opA(A) * opB(B) + beta * C, where C is dense
function sp2md!(transA::Char, alpha::T, A::S, descrA::matrix_descr,
transB::Char, B::S, descrB::matrix_descr,
beta::T, C::StridedMatrix{T};
dense_layout::sparse_layout_t = SPARSE_LAYOUT_COLUMN_MAJOR
) where {S <: AbstractSparseMatrix{T}} where T
check_trans(transA)
check_trans(transB)
check_mat_op_sizes(C, A, transA, B, transB)
ldC = stride(C, 2)
hA = MKLSparseMatrix(A)
hB = MKLSparseMatrix(B)
res = mkl_call(Val{:mkl_sparse_T_sp2mdI}(), S,
transA, descrA, hA, transB, descrB, hB,
alpha, beta,
C, dense_layout, ldC)
destroy(hA)
destroy(hB)
check_status(res)
return C
end

# C := A * op(A), or
# C := op(A) * A, where C is sparse
# note: only the upper triangular part of C is computed
function syrk(transA::Char, A::SparseMatrixCSR; copytri::Bool = false)
copytri && error("syrk() wrapper does not implement copytri=true")
check_trans(transA)
Cout = Ref{sparse_matrix_t}()
hA = MKLSparseMatrix(A)
res = mkl_call(Val{:mkl_sparse_syrkI}(), typeof(A),
transA, hA, Cout)
destroy(hA)
check_status(res)
# NOTE: we are guessing what is the storage format of C
hC = MKLSparseMatrix{typeof(A)}(Cout[])
C = convert(typeof(A), hC)
destroy(hC)
return C
end

# CSC is not supported by SparseMKL directly, so treat A as Aᵀ in CSR format
# note: only the lower triangular part of C is computed (lower CSC = upper CSR)
function syrk(transA::Char, A::SparseMatrixCSC{T}; kwargs...) where T
C = syrk(dual_opcode(T, transA),
convert(SparseMatrixCSR, transpose(A)); kwargs...)
return convert(typeof(A), transpose(C))
end

# C := A * op(A), or
# C := op(A) * A, where C is dense
# note: only the upper triangular part of C is computed
function syrkd!(transA::Char, alpha::T, A::SparseMatrixCSR{T}, beta::T,
C::StridedMatrix{T};
dense_layout::sparse_layout_t = SPARSE_LAYOUT_COLUMN_MAJOR,
copytri::Bool = true
) where T
check_trans(transA)
check_mat_op_sizes(C, A, transA, A, transA == 'N' ? 'T' : 'N'; dense_layout)
ldC = stride(C, 2)
hA = MKLSparseMatrix(A)
res = mkl_call(Val{:mkl_sparse_T_syrkdI}(), typeof(A),
transA, hA, alpha, beta, C, dense_layout, ldC)
destroy(hA)
check_status(res)
copytri && fastcopytri!(C, dense_layout == SPARSE_LAYOUT_COLUMN_MAJOR ? 'U' : 'L',
T <: Complex)
return C
end

# CSC is not supported by SparseMKL directly, so treat A as Aᵀ in CSR format
function syrkd!(transA::Char, alpha::T, A::SparseMatrixCSC{T}, beta::T,
C::StridedMatrix{T}; kwarg...
) where T
# since CSC support is implemented by transposing A, the A*A' has to be conjugated
# to be correct in the complex case, that produces incorrect results when beta != 0
(T <: Complex) && error("syrkd!() wrapper does not support SparseMatrixCSC with complex values")
syrkd!(dual_opcode(T, transA), alpha,
convert(SparseMatrixCSR, transpose(A)), beta, C; kwarg...)
end

# C := alpha * op(A) * B * A + beta * C, or
# C := alpha * A * B * op(A) + beta * C, where C is dense
# note: only the upper triangular part of C is computed
function syprd!(transA::Char, alpha::T, A::SparseMatrixCSR{T},
B::StridedMatrix{T}, beta::T, C::StridedMatrix{T};
dense_layout_B::sparse_layout_t = SPARSE_LAYOUT_COLUMN_MAJOR,
dense_layout_C::sparse_layout_t = SPARSE_LAYOUT_COLUMN_MAJOR,
copytri::Bool = true
) where T
check_trans(transA)
# FIXME dense_layout_B not used
check_mat_op_sizes(C, A, transA, B, 'N';
check_result_columns = false, dense_layout = dense_layout_C)
check_mat_op_sizes(C, B, 'N', A, transA == 'N' ? 'T' : 'N';
check_result_rows = false, dense_layout = dense_layout_C)
ldB = stride(B, 2)
ldC = stride(C, 2)
hA = MKLSparseMatrix(A)
res = mkl_call(Val{:mkl_sparse_T_syprdI}(), typeof(A),
transA, hA, B, dense_layout_B, ldB,
alpha, beta, C, dense_layout_C, ldC)
destroy(hA)
check_status(res)
copytri && fastcopytri!(C, dense_layout_C == SPARSE_LAYOUT_COLUMN_MAJOR ? 'U' : 'L',
T <: Complex)
return C
end

function syprd!(transA::Char, alpha::T, A::SparseMatrixCSC{T},
B::StridedMatrix{T}, beta::T, C::StridedMatrix{T};
kwargs...
) where T
# since CSC support is implemented by transposing A, the A*A' has to be conjugated
# to be correct in the complex case, that produces incorrect results when beta != 0
(T <: Complex) && error("syprd!() wrapper does not support SparseMatrixCSC with complex values")

syprd!(
dual_opcode(T, transA), alpha,
convert(SparseMatrixCSR, transpose(A)),
B, beta, C; kwargs...
)
end

# find y: op(A) * y = alpha * x
function trsv!(transA::Char, alpha::T, A::AbstractSparseMatrix{T}, descr::matrix_descr,
x::StridedVector{T}, y::StridedVector{T}
Expand Down
Loading