Skip to content

sparse cuda arrays support #136

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 36 additions & 19 deletions perf/perf.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,30 @@
using Flux, GraphNeuralNetworks, Graphs, BenchmarkTools, CUDA
using DataFrames, Statistics, JLD2, SparseArrays
CUDA.device!(2)
using Unitful
# CUDA.device!(2)
CUDA.allowscalar(false)

BenchmarkTools.ratio(::Missing, x) = Inf
BenchmarkTools.ratio(x, ::Missing) = 0.0
BenchmarkTools.ratio(::Missing, ::Missing) = missing
function getres(res, str)
ismissing(res[str]) && return missing
t = median(res[str]).time
if t < 1e3
t * u"ns"
elseif t < 1e6
t / 1e3 * u"μs"
elseif t < 1e9
t / 1e6 * u"ms"
else
t / 1e9 * u"s"
end
end

function run_single_benchmark(N, c, D, CONV; gtype=:lg)
data = erdos_renyi(N, c / (N-1), seed=17)
X = randn(Float32, D, N)


data = erdos_renyi(N, c / (N-1), seed=17)
g = GNNGraph(data; ndata=X, graph_type=gtype)

# g = rand_graph(N, c*N; ndata=X, graph_type=gtype)
g_gpu = g |> gpu

m = CONV(D => D)
Expand Down Expand Up @@ -58,11 +71,12 @@ function run_benchmarks(;
c = 6,
D = 100,
layers = [GCNConv, GATConv],
gtypes = [:coo, :sparse, :dense],
gtypes = [:coo],
)

df = DataFrame(N=Int[], c=Float64[], layer=String[], gtype=Symbol[],
time_cpu=Any[], time_gpu=Any[]) |> allowmissing
df = DataFrame(N=Int[], c=Int[], layer=String[], gtype=Symbol[],
time_fwd_cpu=Any[], time_fwd_gpu=Any[],
time_grad_cpu=Any[], time_grad_gpu=Any[])

for gtype in gtypes
for N in Ns
Expand All @@ -73,34 +87,37 @@ function run_benchmarks(;
N = N,
c = c,
gtype = gtype,
time_cpu = ismissing(res["CPU"]) ? missing : median(res["CPU"]),
time_gpu = ismissing(res["GPU"]) ? missing : median(res["GPU"]),
time_fwd_cpu = getres(res, "CPU_FWD"),
time_fwd_gpu = getres(res, "GPU_FWD"),
time_grad_cpu = getres(res, "CPU_GRAD"),
time_grad_gpu = getres(res, "GPU_GRAD"),
)
push!(df, row)
println(row)
end
end
end

df.gpu_to_cpu = ratio.(df.time_gpu, df.time_cpu)
df.grad_gpu_to_cpu = NoUnits.(df.time_grad_gpu ./ df.time_grad_cpu)
sort!(df, [:layer, :N, :c, :gtype])
return df
end

# df = run_benchmarks()
# for g in groupby(df, :layer); println(g, "\n"); end
df = run_benchmarks()
for g in groupby(df, :layer); println(g, "\n"); end

# @save "perf/perf_master_20210803_carlo.jld2" dfmaster=df
# @save "master_2021_11_01_arrakis.jld2" dfmaster=df
## or
# @save "perf/perf_pr.jld2" dfpr=df
# @save "pr.jld2" dfpr=df


function compare(dfpr, dfmaster; on=[:N, :c, :gtype, :layer])
df = outerjoin(dfpr, dfmaster; on=on, makeunique=true, renamecols = :_pr => :_master)
df.pr_to_master_cpu = ratio.(df.time_cpu_pr, df.time_cpu_master)
df.pr_to_master_gpu = ratio.(df.time_gpu_pr, df.time_gpu_master)
df.pr_to_master_cpu = df.time_cpu_pr ./ df.time_cpu_master
df.pr_to_master_gpu = df.time_gpu_pr ./ df.time_gpu_master
return df[:,[:N, :c, :gtype, :layer, :pr_to_master_cpu, :pr_to_master_gpu]]
end

# @load "perf/perf_pr.jld2" dfpr
# @load "perf/perf_master.jld2" dfmaster
# compare(dfpr, dfmaster)
# compare(dfpr, dfmaster)
30 changes: 9 additions & 21 deletions src/GNNGraphs/convert.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,20 +81,16 @@ to_dense(A::AbstractSparseMatrix, x...; kws...) = to_dense(collect(A), x...; kws

function to_dense(A::ADJMAT_T, T=nothing; dir=:out, num_nodes=nothing, weighted=true)
@assert dir ∈ [:out, :in]
T = T === nothing ? eltype(A) : T
num_nodes = size(A, 1)
@assert num_nodes == size(A, 2)
# @assert all(x -> (x == 1) || (x == 0), A)
num_edges = numnonzeros(A)
if dir == :in
A = A'
end
if T != eltype(A)
A = T.(A)
end
if !weighted
A = map(x -> ifelse(x > 0, T(1), T(0)), A)
A = binarize(A)
end
A = convert_eltype(T, A)
return A, num_nodes, num_edges
end

Expand Down Expand Up @@ -128,10 +124,7 @@ function to_dense(coo::COO_T, T=nothing; dir=:out, num_nodes=nothing, weighted=t
if val === nothing || !weighted
val = ones_like(s, T)
end
if eltype(val) != T
val = T.(val)
end

val = convert_eltype(T, val)
idxs = s .+ n .* (t .- 1)

## using scatter instead of indexing since there could be multiple edges
Expand All @@ -149,20 +142,17 @@ function to_sparse(A::ADJMAT_T, T=nothing; dir=:out, num_nodes=nothing, weighted
@assert dir ∈ [:out, :in]
num_nodes = size(A, 1)
@assert num_nodes == size(A, 2)
T = T === nothing ? eltype(A) : T
num_edges = A isa AbstractSparseMatrix ? nnz(A) : count(!=(0), A)
if dir == :in
A = A'
end
if T != eltype(A)
A = T.(A)
end
if !(A isa AbstractSparseMatrix)
A = sparse(A)
A = _sparse(A)
end
if !weighted
A = map(x -> ifelse(x > 0, T(1), T(0)), A)
A = binarize(A)
end
A = convert_eltype(T, A)
num_edges = nnz(A)
return A, num_nodes, num_edges
end

Expand All @@ -180,10 +170,8 @@ function to_sparse(coo::COO_T, T=nothing; dir=:out, num_nodes=nothing, weighted=
end

num_nodes::Int = isnothing(num_nodes) ? max(maximum(s), maximum(t)) : num_nodes
A = sparse(s, t, eweight, num_nodes, num_nodes)
A = _sparse(s, t, eweight, num_nodes, num_nodes)
num_edges::Int = nnz(A)
if eltype(A) != T
A = T.(A)
end
A = convert_eltype(T, A)
return A, num_nodes, num_edges
end
25 changes: 11 additions & 14 deletions src/GNNGraphs/query.jl
Original file line number Diff line number Diff line change
Expand Up @@ -131,35 +131,32 @@ adjacency_list(g::GNNGraph; dir=:out) = adjacency_list(g, 1:g.num_nodes; dir)


"""
adjacency_matrix(g::GNNGraph, T=eltype(g); dir=:out, weighted=true)
adjacency_matrix(g::GNNGraph, [T]; dir=:out, weighted=true)

Return the adjacency matrix `A` for the graph `g`.

If `dir=:out`, `A[i,j] > 0` denotes the presence of an edge from node `i` to node `j`.
If `dir=:in` instead, `A[i,j] > 0` denotes the presence of an edge from node `j` to node `i`.

User may specify the eltype `T` of the returned matrix.
The user can specify the eltype `T` of the returned matrix.

If `weighted=true`, the `A` will contain the edge weigths if any, otherwise the elements of `A` will be either 0 or 1.
"""
function Graphs.adjacency_matrix(g::GNNGraph{<:COO_T}, T::DataType=eltype(g); dir=:out, weighted=true)
if g.graph[1] isa CuVector
# TODO revisit after https://github.com/JuliaGPU/CUDA.jl/pull/1152
A, n, m = to_dense(g.graph, T; num_nodes=g.num_nodes, weighted)
else
A, n, m = to_sparse(g.graph, T; num_nodes=g.num_nodes, weighted)
end
@assert size(A) == (n, n)
function Graphs.adjacency_matrix(g::GNNGraph{<:COO_T}, T::TT=eltype(g); dir=:out, weighted=true) where
{TT <: Union{DataType}}
A, num_nodes, num_edges = to_sparse(g.graph, T; num_nodes=g.num_nodes, weighted)
@assert size(A) == (num_nodes, num_nodes)
return dir == :out ? A : A'
end

function Graphs.adjacency_matrix(g::GNNGraph{<:ADJMAT_T}, T::DataType=eltype(g); dir=:out, weighted=true)
function Graphs.adjacency_matrix(g::GNNGraph{<:ADJMAT_T}, T::TT=eltype(g); dir=:out, weighted=true) where
{TT <: Union{DataType}}
@assert dir ∈ [:in, :out]
A = g.graph
if !weighted
A = binarize(A)
end
A = T != eltype(A) ? T.(A) : A
A = convert_eltype(T, A)
return dir == :out ? A : A'
end

Expand All @@ -177,7 +174,7 @@ function _get_edge_weight(g, edge_weight)
end

"""
degree(g::GNNGraph, T=nothing; dir=:out, edge_weight=true)
degree(g::GNNGraph, [T]; dir=:out, edge_weight=true)

Return a vector containing the degrees of the nodes in `g`.

Expand Down Expand Up @@ -234,7 +231,7 @@ function Graphs.degree(g::GNNGraph{<:ADJMAT_T}, T::TT=nothing; dir=:out, edge_we
if edge_weight === false
A = binarize(A)
end
A = eltype(A) != T ? T.(A) : A
A = convert_eltype(T, A)
return dir == :out ? vec(sum(A, dims=2)) :
dir == :in ? vec(sum(A, dims=1)) :
vec(sum(A, dims=1)) .+ vec(sum(A, dims=2))
Expand Down
49 changes: 48 additions & 1 deletion src/GNNGraphs/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ ones_like(x, sz=size(x)) = ones_like(x, eltype(x), sz)
numnonzeros(a::AbstractSparseMatrix) = nnz(a)
numnonzeros(a::AbstractMatrix) = count(!=(0), a)


# each edge is represented by a number in
# 1:N^2
function edge_encoding(s, t, n; directed=true)
Expand Down Expand Up @@ -151,11 +152,56 @@ end

binarize(x) = map(>(0), x)

@non_differentiable numnonzeros(x...)
@non_differentiable binarize(x...)
@non_differentiable edge_encoding(x...)
@non_differentiable edge_decoding(x...)

convert_eltype(::Nothing, x) = x
convert_eltype(::Type{T}, x::AbstractArray{T}) where T = x
convert_eltype(::Type{T}, x::AbstractArray) where T = T.(x)

_sparse(x::AbstractMatrix) = sparse(x)
_sparse(x::AbstractVector) = sparse(x)
_sparse(s, t, w, m, n) = sparse(s, t, w, m, n)

using CUDA.CUSPARSE: CuSparseMatrixCSR, AbstractCuSparseMatrix

# This is working around 2 issues:
# https://github.com/JuliaGPU/CUDA.jl/issues/1402
# https://github.com/JuliaGPU/CUDA.jl/issues/1407
function _sparse(s::AnyCuVector, t::AnyCuVector, w::AnyCuVector{T}, m, n) where T
p = sortperm(s) # issue CUDA#1407
s, t, w = s[p], t[p], w[p]
T.(sparse(s, t, Float32.(w), m, n))
end

# TODO https://github.com/JuliaGPU/CUDA.jl/issues/1403
Base.:*(x::AnyCuMatrix, y::AbstractCuSparseMatrix) = (y' * x')' |> CuMatrix

# Workaround https://github.com/JuliaGPU/CUDA.jl/issues/1406
Base.sum(x::AbstractCuSparseMatrix; dims=:) = cusparse_sum(x, Val(dims))

cusparse_sum(x, ::Val{:}) = sum(cusparse_sum(x, Val(1)))

function cusparse_sum(x::AbstractCuSparseMatrix, ::Val{1})
m, n = size(x)
v = ones_like(x, (1, m))
return v * x
end

function cusparse_sum(x::AbstractCuSparseMatrix, ::Val{2})
m, n = size(x)
v = ones_like(x, (n, 1))
return x * v
end

# # TODO remove this piracy when this is merged
# # https://github.com/JuliaGPU/CUDA.jl/pull/1401
# function CUDA.cu(x::SparseMatrixCSC)
# # Avoid casting to CuSparseMatrixCSC since it is not well supported
# CuSparseMatrixCSR(x)
# end

####################################
# FROM MLBASE.jl
Expand Down Expand Up @@ -214,4 +260,5 @@ function getobs!(buffers::Union{Tuple, NamedTuple},
getobs!(buffer, x, indices)
end
end
#######################################################
#######################################################

15 changes: 0 additions & 15 deletions src/msgpass.jl
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,6 @@ function propagate(::typeof(copy_xj), g::GNNGraph, ::typeof(+), xi, xj::Abstract
return xj * A
end

## avoid the fast path on gpu until we have better cuda support
function propagate(::typeof(copy_xj), g::GNNGraph{<:Union{COO_T,SPARSE_T}}, ::typeof(+), xi, xj::AnyCuMatrix, e)
propagate((xi,xj,e) -> copy_xj(xi,xj,e), g, +, xi, xj, e)
end

## E_MUL_XJ

# for weighted convolution
Expand All @@ -203,11 +198,6 @@ function propagate(::typeof(e_mul_xj), g::GNNGraph, ::typeof(+), xi, xj::Abstrac
return xj * A
end

## avoid the fast path on gpu until we have better cuda support
function propagate(::typeof(e_mul_xj), g::GNNGraph{<:Union{COO_T,SPARSE_T}}, ::typeof(+), xi, xj::AnyCuMatrix, e::AbstractVector)
propagate((xi,xj,e) -> e_mul_xj(xi,xj,e), g, +, xi, xj, e)
end

## W_MUL_XJ

# for weighted convolution
Expand All @@ -216,11 +206,6 @@ function propagate(::typeof(w_mul_xj), g::GNNGraph, ::typeof(+), xi, xj::Abstrac
return xj * A
end

## avoid the fast path on gpu until we have better cuda support
function propagate(::typeof(w_mul_xj), g::GNNGraph{<:Union{COO_T,SPARSE_T}}, ::typeof(+), xi, xj::AnyCuMatrix, e::Nothing)
propagate((xi,xj,e) -> w_mul_xj(xi,xj,e), g, +, xi, xj, e)
end




Expand Down
Loading