diff --git a/benchmarks/GraphBLAS-sharp.Benchmarks/Configs/Kronecker.txt b/benchmarks/GraphBLAS-sharp.Benchmarks/Configs/Kronecker.txt
new file mode 100644
index 00000000..b16b59a8
--- /dev/null
+++ b/benchmarks/GraphBLAS-sharp.Benchmarks/Configs/Kronecker.txt
@@ -0,0 +1,3 @@
+can_634
+Si2
+lshp1561
diff --git a/benchmarks/GraphBLAS-sharp.Benchmarks/GraphBLAS-sharp.Benchmarks.fsproj b/benchmarks/GraphBLAS-sharp.Benchmarks/GraphBLAS-sharp.Benchmarks.fsproj
index 2dd0c406..ea52ed24 100644
--- a/benchmarks/GraphBLAS-sharp.Benchmarks/GraphBLAS-sharp.Benchmarks.fsproj
+++ b/benchmarks/GraphBLAS-sharp.Benchmarks/GraphBLAS-sharp.Benchmarks.fsproj
@@ -23,6 +23,7 @@
+
diff --git a/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs b/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs
new file mode 100644
index 00000000..f8c7b880
--- /dev/null
+++ b/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs
@@ -0,0 +1,142 @@
+module GraphBLAS.FSharp.Benchmarks.Matrix.Kronecker
+
+open System.IO
+open BenchmarkDotNet.Attributes
+open Brahma.FSharp
+open GraphBLAS.FSharp
+open GraphBLAS.FSharp.IO
+open GraphBLAS.FSharp.Backend.Quotes
+open GraphBLAS.FSharp.Objects
+open GraphBLAS.FSharp.Objects.ClContextExtensions
+open GraphBLAS.FSharp.Benchmarks
+
+[]
+[]
+[]
+[)>]
+type Benchmarks<'elem when 'elem : struct>(
+ buildFunToBenchmark,
+ converter: string -> 'elem,
+ converterBool,
+ buildMatrix) =
+
+ let mutable funToBenchmark = None
+
+ let mutable matrix = Unchecked.defaultof>
+
+ let mutable matrixHost = Unchecked.defaultof<_>
+
+ member val ResultMatrix = Unchecked.defaultof option> with get, set
+
+ []
+ member val OclContextInfo = Unchecked.defaultof with get, set
+
+ []
+ member val InputMatrixReader = Unchecked.defaultof with get, set
+
+ member this.OclContext: ClContext = (fst this.OclContextInfo).ClContext
+ member this.WorkGroupSize = snd this.OclContextInfo
+
+ member this.Processor =
+ let p = (fst this.OclContextInfo).Queue
+ p.Error.Add(fun e -> failwithf "%A" e)
+ p
+
+ static member AvailableContexts = Utils.availableContexts
+
+ static member InputMatrixProviderBuilder pathToConfig =
+ let datasetFolder = ""
+ pathToConfig
+ |> Utils.getMatricesFilenames
+ |> Seq.map
+ (fun matrixFilename ->
+ printfn "%A" matrixFilename
+
+ match Path.GetExtension matrixFilename with
+ | ".mtx" ->
+ MtxReader(Utils.getFullPathToMatrix datasetFolder matrixFilename)
+ | _ -> failwith "Unsupported matrix format")
+
+ member this.FunToBenchmark =
+ match funToBenchmark with
+ | None ->
+ let x = buildFunToBenchmark this.OclContext this.WorkGroupSize
+ funToBenchmark <- Some x
+ x
+ | Some x -> x
+
+ member this.ReadMatrix (reader: MtxReader) =
+ let converter =
+ match reader.Field with
+ | Pattern -> converterBool
+ | _ -> converter
+
+ reader.ReadMatrix converter
+
+ member this.Mxm() =
+ this.ResultMatrix <- this.FunToBenchmark this.Processor DeviceOnly matrix matrix
+
+ member this.ClearInputMatrices() =
+ matrix.Dispose this.Processor
+
+ member this.ClearResult() =
+ match this.ResultMatrix with
+ | Some matrix -> matrix.Dispose this.Processor
+ | None -> ()
+
+ member this.ReadMatrices() =
+ matrixHost <- this.ReadMatrix this.InputMatrixReader
+
+ member this.LoadMatricesToGPU () =
+ matrix <- buildMatrix this.OclContext matrixHost
+
+ abstract member GlobalSetup : unit -> unit
+
+ abstract member Benchmark : unit -> unit
+
+ abstract member IterationCleanup : unit -> unit
+
+ abstract member GlobalCleanup : unit -> unit
+
+module WithoutTransfer =
+ type Benchmark<'elem when 'elem : struct>(
+ buildFunToBenchmark,
+ converter: string -> 'elem,
+ converterBool,
+ buildMatrix) =
+
+ inherit Benchmarks<'elem>(
+ buildFunToBenchmark,
+ converter,
+ converterBool,
+ buildMatrix)
+
+ []
+ override this.GlobalSetup() =
+ this.ReadMatrices()
+ this.LoadMatricesToGPU()
+
+ []
+ override this.Benchmark() =
+ this.Mxm()
+ this.Processor.PostAndReply(Msg.MsgNotifyMe)
+
+ []
+ override this.IterationCleanup () =
+ this.ClearResult()
+
+ []
+ override this.GlobalCleanup () =
+ this.ClearInputMatrices()
+
+ type Float32() =
+
+ inherit Benchmark(
+ Operations.kronecker (ArithmeticOperations.float32MulOption),
+ float32,
+ (fun _ -> Utils.nextSingle (System.Random())),
+ (fun context matrix -> ClMatrix.CSR <| matrix.ToCSR.ToDevice context)
+ )
+
+ static member InputMatrixProvider =
+ Benchmarks<_>.InputMatrixProviderBuilder "Kronecker.txt"
diff --git a/src/GraphBLAS-sharp.Backend/Common/ClArray.fs b/src/GraphBLAS-sharp.Backend/Common/ClArray.fs
index ad6b3caf..6bea80a1 100644
--- a/src/GraphBLAS-sharp.Backend/Common/ClArray.fs
+++ b/src/GraphBLAS-sharp.Backend/Common/ClArray.fs
@@ -728,11 +728,11 @@ module ClArray =
bound<'a, int> Search.Bin.lowerBound clContext
///
- /// Gets the value at the specified position from the input array.
+ /// Gets the value at the specified position from the input array and insert it into given ClCell.
///
/// OpenCL context.
/// Should be a power of 2 and greater than 1.
- let item<'a> (clContext: ClContext) workGroupSize =
+ let itemTo<'a> (clContext: ClContext) workGroupSize =
let kernel =
<@ fun (ndRange: Range1D) index (array: ClArray<'a>) (result: ClCell<'a>) ->
@@ -744,21 +744,37 @@ module ClArray =
let program = clContext.Compile kernel
- fun (processor: MailboxProcessor<_>) (index: int) (array: ClArray<'a>) ->
+ fun (processor: MailboxProcessor<_>) (index: int) (array: ClArray<'a>) (output: ClCell<'a>) ->
if index < 0 || index >= array.Length then
failwith "Index out of range"
- let result =
- clContext.CreateClCell Unchecked.defaultof<'a>
-
let kernel = program.GetKernel()
let ndRange = Range1D.CreateValid(1, workGroupSize)
- processor.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange index array result))
+ processor.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange index array output))
processor.Post(Msg.CreateRunMsg<_, _> kernel)
+ ///
+ /// Gets the value at the specified position from the input array.
+ ///
+ /// OpenCL context.
+ /// Should be a power of 2 and greater than 1.
+ let item<'a> (clContext: ClContext) workGroupSize =
+
+ let itemTo = itemTo clContext workGroupSize
+
+ fun (processor: MailboxProcessor<_>) (index: int) (array: ClArray<'a>) ->
+
+ if index < 0 || index >= array.Length then
+ failwith "Index out of range"
+
+ let result =
+ clContext.CreateClCell Unchecked.defaultof<'a>
+
+ itemTo processor index array result
+
result
///
diff --git a/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs b/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs
index 9ff810bf..8c5835ba 100644
--- a/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs
+++ b/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs
@@ -171,7 +171,7 @@ module internal Kronecker =
let private setPositions<'c when 'c: struct> (clContext: ClContext) workGroupSize =
let setPositions =
- <@ fun (ndRange: Range1D) rowCount columnCount startIndex (rowOffset: ClCell) (columnOffset: ClCell) (bitmap: ClArray) (values: ClArray<'c>) (resultRows: ClArray) (resultColumns: ClArray) (resultValues: ClArray<'c>) ->
+ <@ fun (ndRange: Range1D) rowCount columnCount startIndex (rowOffset: int) (columnOffset: int) (bitmap: ClArray) (values: ClArray<'c>) (resultRows: ClArray) (resultColumns: ClArray) (resultValues: ClArray<'c>) ->
let gid = ndRange.GlobalID0
@@ -184,8 +184,8 @@ module internal Kronecker =
let index = startIndex + bitmap.[gid] - 1
- resultRows.[index] <- rowIndex + rowOffset.Value
- resultColumns.[index] <- columnIndex + columnOffset.Value
+ resultRows.[index] <- rowIndex + rowOffset
+ resultColumns.[index] <- columnIndex + columnOffset
resultValues.[index] <- values.[gid] @>
let kernel = clContext.Compile <| setPositions
@@ -202,9 +202,6 @@ module internal Kronecker =
let kernel = kernel.GetKernel()
- let rowOffset = rowOffset |> clContext.CreateClCell
- let columnOffset = columnOffset |> clContext.CreateClCell
-
processor.Post(
Msg.MsgSetArguments
(fun () ->
@@ -224,23 +221,20 @@ module internal Kronecker =
processor.Post(Msg.CreateRunMsg<_, _> kernel)
- rowOffset.Free processor
- columnOffset.Free processor
-
(sum.ToHostAndFree processor) + startIndex
let private copyToResult (clContext: ClContext) workGroupSize =
let copyToResult =
- <@ fun (ndRange: Range1D) startIndex sourceLength (rowOffset: ClCell) (columnOffset: ClCell) (sourceRows: ClArray) (sourceColumns: ClArray) (sourceValues: ClArray<'c>) (resultRows: ClArray) (resultColumns: ClArray) (resultValues: ClArray<'c>) ->
+ <@ fun (ndRange: Range1D) startIndex sourceLength (rowOffset: int) (columnOffset: int) (sourceRows: ClArray) (sourceColumns: ClArray) (sourceValues: ClArray<'c>) (resultRows: ClArray) (resultColumns: ClArray) (resultValues: ClArray<'c>) ->
let gid = ndRange.GlobalID0
if gid < sourceLength then
let index = startIndex + gid
- resultRows.[index] <- rowOffset.Value + sourceRows.[gid]
- resultColumns.[index] <- columnOffset.Value + sourceColumns.[gid]
+ resultRows.[index] <- rowOffset + sourceRows.[gid]
+ resultColumns.[index] <- columnOffset + sourceColumns.[gid]
resultValues.[index] <- sourceValues.[gid] @>
let kernel = clContext.Compile <| copyToResult
@@ -252,9 +246,6 @@ module internal Kronecker =
let kernel = kernel.GetKernel()
- let rowOffset = rowOffset |> clContext.CreateClCell
- let columnOffset = columnOffset |> clContext.CreateClCell
-
processor.Post(
Msg.MsgSetArguments
(fun () ->
@@ -274,14 +265,11 @@ module internal Kronecker =
processor.Post(Msg.CreateRunMsg<_, _> kernel)
- rowOffset.Free processor
- columnOffset.Free processor
-
let private insertZero (clContext: ClContext) workGroupSize =
let copy = copyToResult clContext workGroupSize
- fun queue startIndex (zeroCounts: int list array) (matrixZero: COO<'c>) resultMatrix ->
+ fun queue startIndex (zeroCounts: ClArray array) (matrixZero: COO<'c>) resultMatrix ->
let rowCount = zeroCounts.Length
@@ -298,32 +286,31 @@ module internal Kronecker =
startIndex <- startIndex + matrixZero.NNZ
- let rec insertInRowRec zeroCounts row column =
- match zeroCounts with
- | [] -> ()
- | h :: tl ->
- insertMany row column h
+ for row in 0 .. rowCount - 1 do
+ let zeroCountInRow = zeroCounts.[row].ToHostAndFree queue
- insertInRowRec tl row (h + column + 1)
+ let mutable column = 0
- for row in 0 .. rowCount - 1 do
- insertInRowRec zeroCounts.[row] row 0
+ for count in zeroCountInRow do
+ insertMany row column count
+
+ column <- column + count + 1
let private insertNonZero (clContext: ClContext) workGroupSize op =
- let item = ClArray.item clContext workGroupSize
+ let itemTo = ClArray.itemTo clContext workGroupSize
let preparePositions =
preparePositions clContext workGroupSize op
let setPositions = setPositions clContext workGroupSize
- fun queue (rowsEdges: (int * int) array) (matrixRight: CSR<'b>) (leftValues: ClArray<'a>) (leftColsHost: int array) (resultMatrix: COO<'c>) ->
+ fun queue (rowBoundaries: (int * int) array) (matrixRight: CSR<'b>) (leftValues: ClArray<'a>) (leftColsHost: int array) (resultMatrix: COO<'c>) ->
let setPositions =
setPositions queue matrixRight.RowCount matrixRight.ColumnCount
- let rowCount = rowsEdges.Length
+ let rowCount = rowBoundaries.Length
let length =
matrixRight.RowCount * matrixRight.ColumnCount
@@ -336,11 +323,15 @@ module internal Kronecker =
let mutable startIndex = 0
+ let value =
+ clContext.CreateClCell Unchecked.defaultof<'a>
+
for row in 0 .. rowCount - 1 do
- let leftEdge, rightEdge = rowsEdges.[row]
+ let leftEdge, rightEdge = rowBoundaries.[row]
for i in leftEdge .. rightEdge do
- let value = item queue i leftValues
+ itemTo queue i leftValues value
+
let column = leftColsHost.[i]
let rowOffset = row * matrixRight.RowCount
@@ -348,21 +339,88 @@ module internal Kronecker =
preparePositions queue value matrixRight mappedMatrix bitmap
- value.Free queue
-
startIndex <- setPositions rowOffset columnOffset startIndex resultMatrix mappedMatrix bitmap
+ value.Free queue
bitmap.Free queue
mappedMatrix.Free queue
startIndex
+ let private countZeroElements (clContext: ClContext) workGroupSize =
+
+ let countZeroElementsInRow =
+ <@ fun (ndRange: Range1D) (firstIndex: int) (lastIndex: int) (columnCount: int) (columns: ClArray) (result: ClArray) ->
+
+ let gid = ndRange.GlobalID0
+
+ let nnzInRow = lastIndex - firstIndex + 1
+
+ if gid <= nnzInRow then
+
+ if nnzInRow = 0 then
+ result.[0] <- columnCount
+
+ elif gid = nnzInRow then
+ result.[nnzInRow] <- columnCount - columns.[lastIndex] - 1
+
+ elif gid = 0 then
+ result.[0] <- columns.[firstIndex]
+
+ else
+ result.[gid] <-
+ columns.[firstIndex + gid]
+ - columns.[firstIndex + gid - 1]
+ - 1 @>
+
+ let kernel = clContext.Compile countZeroElementsInRow
+
+ fun (queue: MailboxProcessor<_>) (matrix: CSR<_>) (rowBoundaries: (int * int) array) ->
+
+ let kernel = kernel.GetKernel()
+
+ let (zeroCounts: ClArray array) = Array.zeroCreate matrix.RowCount
+
+ for row in 0 .. matrix.RowCount - 1 do
+
+ let firstIndex = fst rowBoundaries.[row]
+ let lastIndex = snd rowBoundaries.[row]
+
+ let nnzInRow = lastIndex - firstIndex + 1
+ let length = nnzInRow + 1
+
+ let ndRange =
+ Range1D.CreateValid(length, workGroupSize)
+
+ let result =
+ clContext.CreateClArrayWithSpecificAllocationMode(DeviceOnly, length)
+
+ queue.Post(
+ Msg.MsgSetArguments
+ (fun () ->
+ kernel.KernelFunc ndRange firstIndex lastIndex matrix.ColumnCount matrix.Columns result)
+ )
+
+ queue.Post(Msg.CreateRunMsg<_, _>(kernel))
+
+ zeroCounts.[row] <- result
+
+ zeroCounts
+
let private mapAll<'a, 'b, 'c when 'a: struct and 'b: struct and 'c: struct and 'c: equality>
(clContext: ClContext)
workGroupSize
(op: Expr<'a option -> 'b option -> 'c option>)
=
+ let pairwise = ClArray.pairwise clContext workGroupSize
+
+ let mapInPlace =
+ ClArray.mapInPlace <@ fun (a, b) -> (a, b - 1) @> clContext workGroupSize
+
+ let countZeroElements =
+ countZeroElements clContext workGroupSize
+
let insertNonZero = insertNonZero clContext workGroupSize op
let insertZero = insertZero clContext workGroupSize
@@ -386,35 +444,22 @@ module internal Kronecker =
RowCount = matrixLeft.RowCount * matrixRight.RowCount
ColumnCount = matrixLeft.ColumnCount * matrixRight.ColumnCount }
- let leftRowPointers = matrixLeft.RowPointers.ToHost queue
let leftColumns = matrixLeft.Columns.ToHost queue
- let nnzInRows =
- leftRowPointers
- |> Array.pairwise
- |> Array.map (fun (fst, snd) -> snd - fst)
-
- let rowsEdges =
- leftRowPointers
- |> Array.pairwise
- |> Array.map (fun (fst, snd) -> (fst, snd - 1))
-
- let (zeroCounts: int list array) = Array.zeroCreate matrixLeft.RowCount
-
- { 0 .. matrixLeft.RowCount - 1 }
- |> Seq.iter2
- (fun edges i ->
- zeroCounts.[i] <-
- leftColumns.[fst edges..snd edges]
- |> Array.toList
- |> List.insertAt 0 -1
- |> List.insertAt (nnzInRows.[i] + 1) matrixLeft.ColumnCount
- |> List.pairwise
- |> List.map (fun (fstCol, sndCol) -> sndCol - fstCol - 1))
- rowsEdges
+ let pairsOfRowPointers =
+ pairwise queue DeviceOnly matrixLeft.RowPointers
+ |> Option.defaultWith
+ (fun () -> failwith "The state of the matrix is broken. The length of the rowPointers must be >= 2")
+
+ mapInPlace queue pairsOfRowPointers
+
+ let rowBoundaries = pairsOfRowPointers.ToHostAndFree queue
+
+ let zeroCounts =
+ countZeroElements queue matrixLeft rowBoundaries
let startIndex =
- insertNonZero queue rowsEdges matrixRight matrixLeft.Values leftColumns resultMatrix
+ insertNonZero queue rowBoundaries matrixRight matrixLeft.Values leftColumns resultMatrix
matrixZero
|> Option.iter (fun m -> insertZero queue startIndex zeroCounts m resultMatrix)