[NDTensors] Reorganize some GPU functions (#1352)

kmp5VT · web-flow · commit 638624f527f5 · 2024-03-14T10:32:34.000-04:00
diff --git a/NDTensors/ext/NDTensorsCUDAExt/adapt.jl b/NDTensors/ext/NDTensorsCUDAExt/adapt.jl
@@ -1,28 +1,24 @@
-## Here we need an NDTensorCuArrayAdaptor because the CuArrayAdaptor provided by CUDA
-## converts 64 bit numbers to 32 bit.  We cannot write `adapt(CuVector, x)` because this
-## Will not allow us to properly utilize the buffer preference without changing the value of
-## default_buffertype. Also `adapt(CuVector{<:Any, <:Any, Buffertype})` fails to work properly
-struct NDTensorCuArrayAdaptor{B} end
+using NDTensors.TypeParameterAccessors: TypeParameterAccessors
+using NDTensors.GPUArraysCoreExtensions: storagemode
+using NDTensors.CUDAExtensions: CUDAExtensions, CuArrayAdaptor
+
 ## TODO make this work for unified. This works but overwrites CUDA's adapt_storage. This fails for emptystorage...
-function cu(xs; unified::Bool=false)
+function CUDAExtensions.cu(xs; unified::Bool=false)
   return fmap(
-    x -> adapt(NDTensorCuArrayAdaptor{unified ? Mem.UnifiedBuffer : Mem.DeviceBuffer}(), x),
-    xs,
+    x -> adapt(CuArrayAdaptor{unified ? Mem.UnifiedBuffer : Mem.DeviceBuffer}(), x), xs
   )
 end
 
-buffertype(::NDTensorCuArrayAdaptor{B}) where {B} = B
-
-function Adapt.adapt_storage(adaptor::NDTensorCuArrayAdaptor, xs::AbstractArray)
+function Adapt.adapt_storage(adaptor::CuArrayAdaptor, xs::AbstractArray)
   ElT = eltype(xs)
-  BufT = buffertype(adaptor)
+  BufT = storagemode(adaptor)
   N = ndims(xs)
   return isbits(xs) ? xs : adapt(CuArray{ElT,N,BufT}, xs)
 end
 
 function NDTensors.adapt_storagetype(
-  adaptor::NDTensorCuArrayAdaptor, xs::Type{EmptyStorage{ElT,StoreT}}
+  adaptor::CuArrayAdaptor, xs::Type{EmptyStorage{ElT,StoreT}}
 ) where {ElT,StoreT}
-  BufT = buffertype(adaptor)
+  BufT = storagemode(adaptor)
   return NDTensors.emptytype(NDTensors.adapt_storagetype(CuVector{ElT,BufT}, StoreT))
 end
diff --git a/NDTensors/ext/NDTensorsCUDAExt/imports.jl b/NDTensors/ext/NDTensorsCUDAExt/imports.jl
@@ -1,4 +1,4 @@
-import NDTensors: cu, similartype
+import NDTensors: similartype
 import NDTensors:
   ContractionProperties, _contract!, GemmBackend, auto_select_backend, _gemm!, iscu
 import NDTensors.SetParameters: nparameters, get_parameter, set_parameter, default_parameter
diff --git a/NDTensors/ext/NDTensorsCUDAExt/set_types.jl b/NDTensors/ext/NDTensorsCUDAExt/set_types.jl
@@ -38,3 +38,20 @@ default_parameter(::Type{<:CuArray}, ::Position{3}) = Mem.DeviceBuffer
 nparameters(::Type{<:CuArray}) = Val(3)
 
 SetParameters.unspecify_parameters(::Type{<:CuArray}) = CuArray
+
+using NDTensors.TypeParameterAccessors: TypeParameterAccessors
+using NDTensors.GPUArraysCoreExtensions: storagemode
+## TODO remove TypeParameterAccessors when SetParameters is removed
+function TypeParameterAccessors.position(::Type{<:CuArray}, ::typeof(eltype))
+  return TypeParameterAccessors.Position(1)
+end
+function TypeParameterAccessors.position(::Type{<:CuArray}, ::typeof(Base.ndims))
+  return TypeParameterAccessors.Position(2)
+end
+function TypeParameterAccessors.position(::Type{<:CuArray}, ::typeof(storagemode))
+  return TypeParameterAccessors.Position(3)
+end
+
+function TypeParameterAccessors.default_type_parameters(::Type{<:CuArray})
+  return (Float64, 1, CUDA.Mem.DeviceBuffer)
+end
diff --git a/NDTensors/ext/NDTensorsMetalExt/adapt.jl b/NDTensors/ext/NDTensorsMetalExt/adapt.jl
@@ -1,10 +1,14 @@
-NDTensors.cpu(e::Exposed{<:MtlArray}) = adapt(Array, e)
+using NDTensors.MetalExtensions: MetalExtensions
+using NDTensors.GPUArraysCoreExtensions: GPUArraysCoreExtensions
 
-function mtl(xs; storage=DefaultStorageMode)
+GPUArraysCoreExtensions.cpu(e::Exposed{<:MtlArray}) = adapt(Array, e)
+
+function MetalExtensions.mtl(xs; storage=DefaultStorageMode)
   return adapt(set_storagemode(MtlArray, storage), xs)
 end
 
 # More general than the version in Metal.jl
+## TODO Rewrite this using a custom `MtlArrayAdaptor` which will be written in  `MetalExtensions`.
 function Adapt.adapt_storage(arraytype::Type{<:MtlArray}, xs::AbstractArray)
   params = get_parameters(xs)
   arraytype_specified = specify_parameters(arraytype, params...)
diff --git a/NDTensors/ext/NDTensorsMetalExt/imports.jl b/NDTensors/ext/NDTensorsMetalExt/imports.jl
@@ -1,4 +1,3 @@
-import NDTensors: mtl
 import NDTensors.SetParameters: nparameters, get_parameter, set_parameter, default_parameter
 
 using NDTensors.Expose: Exposed, unexpose, expose
diff --git a/NDTensors/ext/NDTensorsMetalExt/set_types.jl b/NDTensors/ext/NDTensorsMetalExt/set_types.jl
@@ -39,9 +39,25 @@ default_parameter(::Type{<:MtlArray}, ::Position{3}) = Metal.DefaultStorageMode
 
 nparameters(::Type{<:MtlArray}) = Val(3)
 
+using NDTensors.TypeParameterAccessors: TypeParameterAccessors
+using NDTensors.GPUArraysCoreExtensions: storagemode
 # Metal-specific type parameter setting
-function set_storagemode(arraytype::Type{<:MtlArray}, storagemode)
-  return set_parameter(arraytype, Position(3), storagemode)
+function set_storagemode(arraytype::Type{<:MtlArray}, param)
+  return TypeParameterAccessors.set_type_parameter(arraytype, storagemode, param)
 end
 
 SetParameters.unspecify_parameters(::Type{<:MtlArray}) = MtlArray
+## TODO remove TypeParameterAccessors when SetParameters is removed
+function TypeParameterAccessors.position(::Type{<:MtlArray}, ::typeof(eltype))
+  return TypeParameterAccessors.Position(1)
+end
+function TypeParameterAccessors.position(::Type{<:MtlArray}, ::typeof(Base.ndims))
+  return TypeParameterAccessors.Position(2)
+end
+function TypeParameterAccessors.position(::Type{<:MtlArray}, ::typeof(storagemode))
+  return TypeParameterAccessors.Position(3)
+end
+
+function TypeParameterAccessors.default_type_parameters(::Type{<:MtlArray})
+  return (Float32, 1, Metal.DefaultStorageMode)
+end
diff --git a/NDTensors/ext/examples/NDTensorCUDA.jl b/NDTensors/ext/examples/NDTensorCUDA.jl
@@ -1,5 +1,6 @@
 using NDTensors
-using CUDA: CUDA, CuVector, cu, reshape
+using NDTensors.CUDAExtensions: cu
+using CUDA: CUDA, CuVector, reshape
 using ITensors:
   Index, ITensor, randomMPO, randomMPS, inner, orthogonalize, qr, siteinds, svd
 using Test: @test
@@ -8,7 +9,7 @@ using Zygote: gradient
 function main()
   # using ITensorGPU
   cpu = NDTensors.cpu
-  gpu = NDTensors.cu
+  gpu = cu
   # Here is an example of how to utilize NDTensors based tensors with CUDA datatypes
   i = Index(2)
   j = Index(5)
diff --git a/NDTensors/ext/examples/NDTensorMetal.jl b/NDTensors/ext/examples/NDTensorMetal.jl
@@ -1,13 +1,14 @@
-using Metal: MtlVector, mtl
+using Metal: MtlVector
 using NDTensors
+using NDTensors.MetalExtensions: mtl
 
 using ITensors: ITensor, Index, randomITensor
 using Test: @test
 using Zygote: gradient
 
 function main()
   cpu = NDTensors.cpu
-  gpu = NDTensors.mtl
+  gpu = mtl
   # Here is an example of how to utilize NDTensors based tensors with CUDA datatypes
   i = Index(20)
   j = Index(5)
diff --git a/NDTensors/src/NDTensors.jl b/NDTensors/src/NDTensors.jl
@@ -1,56 +1,4 @@
 module NDTensors
-# TODO: List types, macros, and functions being used.
-using Adapt
-using Base.Threads
-using Compat
-using Dictionaries
-using Folds
-using GPUArraysCore
-using InlineStrings
-using Random
-using LinearAlgebra
-using StaticArrays
-using Functors
-using HDF5
-using SimpleTraits
-using SplitApplyCombine
-using Strided
-using TimerOutputs
-using TupleTools
-
-for lib in [
-  :AlgorithmSelection,
-  :AllocateData,
-  :BaseExtensions,
-  :UnspecifiedTypes,
-  :TypeParameterAccessors,
-  :Expose,
-  :SetParameters,
-  :BroadcastMapConversion,
-  :RankFactorization,
-  :Sectors,
-  :GradedAxes,
-  :TensorAlgebra,
-  :SparseArrayInterface,
-  :SparseArrayDOKs,
-  :DiagonalArrays,
-  :BlockSparseArrays,
-  :NamedDimsArrays,
-  :SmallVectors,
-  :SortedSets,
-  :TagSets,
-  :UnallocatedArrays,
-]
-  include("lib/$(lib)/src/$(lib).jl")
-  @eval using .$lib: $lib
-end
-
-using Base: @propagate_inbounds, ReshapedArray, DimOrInd, OneTo
-
-using Base.Cartesian: @nexprs
-
-using Base.Threads: @spawn
-
 #####################################
 # Imports and exports
 #
diff --git a/NDTensors/src/adapt.jl b/NDTensors/src/adapt.jl
@@ -1,14 +1,12 @@
+using .GPUArraysCoreExtensions: GPUArraysCoreExtensions
 adapt_structure(to, x::TensorStorage) = setdata(x, adapt(to, data(x)))
 adapt_structure(to, x::Tensor) = setstorage(x, adapt(to, storage(x)))
 
 ## use unwrap cpu here because Expose is included before NDTensors
-Expose.cpu(eltype::Type{<:Number}, x) = fmap(x -> adapt(Array{eltype}, x), x)
-Expose.cpu(x) = fmap(x -> adapt(Array, x), x)
-
-# Implemented in `ITensorGPU` and NDTensorCUDA
-function cu end
-
-function mtl end
+function GPUArraysCoreExtensions.cpu(eltype::Type{<:Number}, x)
+  return fmap(x -> adapt(Array{eltype}, x), x)
+end
+GPUArraysCoreExtensions.cpu(x) = fmap(x -> adapt(Array, x), x)
 
 adapt_structure(to::Type{<:Number}, x::TensorStorage) = setdata(x, convert.(to, data(x)))
 
diff --git a/NDTensors/src/imports.jl b/NDTensors/src/imports.jl
@@ -3,7 +3,64 @@
 # similar to:
 # https://github.com/JuliaGPU/KernelAbstractions.jl
 # https://github.com/oschulz/HeterogeneousComputing.jl
-using .Expose: cpu
+
+using Adapt
+using Base.Threads
+using Compat
+using Dictionaries
+using Folds
+using GPUArraysCore
+using InlineStrings
+using Random
+using LinearAlgebra
+using StaticArrays
+using Functors
+using HDF5
+using SimpleTraits
+using SplitApplyCombine
+using Strided
+using TimerOutputs
+using TupleTools
+
+for lib in [
+  :AlgorithmSelection,
+  :AllocateData,
+  :BaseExtensions,
+  :UnspecifiedTypes,
+  :TypeParameterAccessors,
+  :GPUArraysCoreExtensions,
+  :CUDAExtensions,
+  :MetalExtensions,
+  :Expose,
+  :SetParameters,
+  :BroadcastMapConversion,
+  :RankFactorization,
+  :Sectors,
+  :GradedAxes,
+  :TensorAlgebra,
+  :SparseArrayInterface,
+  :SparseArrayDOKs,
+  :DiagonalArrays,
+  :BlockSparseArrays,
+  :NamedDimsArrays,
+  :SmallVectors,
+  :SortedSets,
+  :TagSets,
+  :UnallocatedArrays,
+]
+  include("lib/$(lib)/src/$(lib).jl")
+  @eval using .$lib: $lib
+end
+
+using Base: @propagate_inbounds, ReshapedArray, DimOrInd, OneTo
+
+using Base.Cartesian: @nexprs
+
+using Base.Threads: @spawn
+
+using .CUDAExtensions: cu
+using .MetalExtensions: mtl
+using .GPUArraysCoreExtensions: cpu
 
 import Base:
   # Types
diff --git a/NDTensors/src/lib/CUDAExtensions/.JuliaFormatter.toml b/NDTensors/src/lib/CUDAExtensions/.JuliaFormatter.toml
@@ -0,0 +1,2 @@
+style = "blue"
+indent = 2
diff --git a/NDTensors/src/lib/CUDAExtensions/src/CUDAExtensions.jl b/NDTensors/src/lib/CUDAExtensions/src/CUDAExtensions.jl
@@ -0,0 +1,4 @@
+module CUDAExtensions
+include("cuda.jl")
+
+end
diff --git a/NDTensors/src/lib/CUDAExtensions/src/cuda.jl b/NDTensors/src/lib/CUDAExtensions/src/cuda.jl
@@ -0,0 +1,15 @@
+using NDTensors.TypeParameterAccessors: TypeParameterAccessors
+using NDTensors.GPUArraysCoreExtensions: storagemode
+# Implemented in `ITensorGPU` and NDTensorCUDA
+function cu end
+
+## Here we need an NDTensorCuArrayAdaptor because the CuArrayAdaptor provided by CUDA
+## converts 64 bit numbers to 32 bit.  We cannot write `adapt(CuVector, x)` because this
+## Will not allow us to properly utilize the buffer preference without changing the value of
+## default_buffertype. Also `adapt(CuVector{<:Any, <:Any, Buffertype})` fails to work properly
+struct CuArrayAdaptor{B} end
+
+## TODO remove TypeParameterAccessors when SetParameters is removed
+function TypeParameterAccessors.position(::Type{<:CuArrayAdaptor}, ::typeof(storagemode))
+  return TypeParameterAccessors.Position(1)
+end
diff --git a/NDTensors/src/lib/CUDAExtensions/test/runtests.jl b/NDTensors/src/lib/CUDAExtensions/test/runtests.jl
@@ -0,0 +1,9 @@
+@eval module $(gensym())
+using Test: @testset, @test
+using NDTensors.CUDAExtensions: cu, CuArrayAdaptor
+using NDTensors.GPUArraysCoreExtensions: storagemode
+@testset "cu function exists" begin
+  @test cu isa Function
+  @test storagemode(CuArrayAdaptor{1}) == 1
+end
+end
diff --git a/NDTensors/src/lib/Expose/src/exposed.jl b/NDTensors/src/lib/Expose/src/exposed.jl
@@ -8,6 +8,7 @@ expose(object) = Exposed{unwrap_array_type(object),typeof(object)}(object)
 
 unexpose(E::Exposed) = E.object
 
+## TODO remove TypeParameterAccessors when SetParameters is removed
 TypeParameterAccessors.parenttype(type::Type{<:Exposed}) = parameter(type, parenttype)
 function TypeParameterAccessors.position(::Type{<:Exposed}, ::typeof(parenttype))
   return TypeParameterAccessors.Position(1)
diff --git a/NDTensors/src/lib/Expose/src/functions/abstractarray.jl b/NDTensors/src/lib/Expose/src/functions/abstractarray.jl
@@ -1,10 +1,12 @@
+using NDTensors.GPUArraysCoreExtensions: GPUArraysCoreExtensions, cpu
+
 parent(E::Exposed) = parent(unexpose(E))
 
 transpose(E::Exposed) = transpose(unexpose(E))
 
 adjoint(E::Exposed) = adjoint(unexpose(E))
 
-cpu(E::Exposed) = cpu(unexpose(E))
+GPUArraysCoreExtensions.cpu(E::Exposed) = cpu(unexpose(E))
 
 getindex(E::Exposed) = unexpose(E)[]
 
diff --git a/NDTensors/src/lib/GPUArraysCoreExtensions/.JuliaFormatter.toml b/NDTensors/src/lib/GPUArraysCoreExtensions/.JuliaFormatter.toml
@@ -0,0 +1,2 @@
+style = "blue"
+indent = 2
diff --git a/NDTensors/src/lib/GPUArraysCoreExtensions/Project.toml b/NDTensors/src/lib/GPUArraysCoreExtensions/Project.toml
@@ -0,0 +1,7 @@
+name = "GPUArraysCoreExtensions"
+
+[deps]
+GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
+
+[compat]
+GPUArraysCore = "0.1"
diff --git a/NDTensors/src/lib/GPUArraysCoreExtensions/src/GPUArraysCoreExtensions.jl b/NDTensors/src/lib/GPUArraysCoreExtensions/src/GPUArraysCoreExtensions.jl
@@ -0,0 +1,4 @@
+module GPUArraysCoreExtensions
+include("gpuarrayscore.jl")
+
+end
diff --git a/NDTensors/src/lib/GPUArraysCoreExtensions/src/gpuarrayscore.jl b/NDTensors/src/lib/GPUArraysCoreExtensions/src/gpuarrayscore.jl
@@ -0,0 +1,10 @@
+using NDTensors.TypeParameterAccessors: TypeParameterAccessors, type_parameter
+
+function storagemode(object)
+  return storagemode(typeof(object))
+end
+function storagemode(type::Type)
+  return type_parameter(type, storagemode)
+end
+
+function cpu end
diff --git a/NDTensors/src/lib/GPUArraysCoreExtensions/test/runtests.jl b/NDTensors/src/lib/GPUArraysCoreExtensions/test/runtests.jl
@@ -0,0 +1,7 @@
+@eval module $(gensym())
+using Test: @testset, @test
+using NDTensors.GPUArraysCoreExtensions: storagemode
+@testset "Test Base" begin
+  @test storagemode isa Function
+end
+end
diff --git a/NDTensors/src/lib/MetalExtensions/.JuliaFormatter.toml b/NDTensors/src/lib/MetalExtensions/.JuliaFormatter.toml
@@ -0,0 +1,2 @@
+style = "blue"
+indent = 2
diff --git a/NDTensors/src/lib/MetalExtensions/src/MetalExtensions.jl b/NDTensors/src/lib/MetalExtensions/src/MetalExtensions.jl
@@ -0,0 +1,4 @@
+module MetalExtensions
+include("metal.jl")
+
+end
diff --git a/NDTensors/src/lib/MetalExtensions/src/metal.jl b/NDTensors/src/lib/MetalExtensions/src/metal.jl
@@ -0,0 +1,2 @@
+# Implemented in `ITensorGPU` and NDTensorCUDA
+function mtl end
diff --git a/NDTensors/src/lib/MetalExtensions/test/runtests.jl b/NDTensors/src/lib/MetalExtensions/test/runtests.jl
diff --git a/NDTensors/src/lib/NamedDimsArrays/src/abstractnameddimsarray.jl b/NDTensors/src/lib/NamedDimsArrays/src/abstractnameddimsarray.jl
diff --git a/NDTensors/src/lib/TypeParameterAccessors/test/test_custom_types.jl b/NDTensors/src/lib/TypeParameterAccessors/test/test_custom_types.jl
diff --git a/NDTensors/src/tensor/set_types.jl b/NDTensors/src/tensor/set_types.jl
diff --git a/NDTensors/test/ITensors/TestITensorDMRG/TestITensorDMRG.jl b/NDTensors/test/ITensors/TestITensorDMRG/TestITensorDMRG.jl
diff --git a/NDTensors/test/NDTensorsTestUtils/device_list.jl b/NDTensors/test/NDTensorsTestUtils/device_list.jl
diff --git a/NDTensors/test/NDTensorsTestUtils/is_supported_eltype.jl b/NDTensors/test/NDTensorsTestUtils/is_supported_eltype.jl
diff --git a/NDTensors/test/lib/runtests.jl b/NDTensors/test/lib/runtests.jl
diff --git a/NDTensors/test/test_blocksparse.jl b/NDTensors/test/test_blocksparse.jl
diff --git a/NDTensors/test/test_dense.jl b/NDTensors/test/test_dense.jl
diff --git a/src/imports.jl b/src/imports.jl

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-import NDTensors: mtl`
`2`	`1`	`import NDTensors.SetParameters: nparameters, get_parameter, set_parameter, default_parameter`
`3`	`2`
`4`	`3`	`using NDTensors.Expose: Exposed, unexpose, expose`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	+# Implemented in `ITensorGPU` and NDTensorCUDA
	`2`	`+function mtl end`