diff --git a/benchmarks/GraphBLAS-sharp.Benchmarks/Configs/Kronecker.txt b/benchmarks/GraphBLAS-sharp.Benchmarks/Configs/Kronecker.txt new file mode 100644 index 00000000..b16b59a8 --- /dev/null +++ b/benchmarks/GraphBLAS-sharp.Benchmarks/Configs/Kronecker.txt @@ -0,0 +1,3 @@ +can_634 +Si2 +lshp1561 diff --git a/benchmarks/GraphBLAS-sharp.Benchmarks/GraphBLAS-sharp.Benchmarks.fsproj b/benchmarks/GraphBLAS-sharp.Benchmarks/GraphBLAS-sharp.Benchmarks.fsproj index 2dd0c406..ea52ed24 100644 --- a/benchmarks/GraphBLAS-sharp.Benchmarks/GraphBLAS-sharp.Benchmarks.fsproj +++ b/benchmarks/GraphBLAS-sharp.Benchmarks/GraphBLAS-sharp.Benchmarks.fsproj @@ -23,6 +23,7 @@ + diff --git a/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs b/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs new file mode 100644 index 00000000..f8c7b880 --- /dev/null +++ b/benchmarks/GraphBLAS-sharp.Benchmarks/Matrix/Kronecker.fs @@ -0,0 +1,142 @@ +module GraphBLAS.FSharp.Benchmarks.Matrix.Kronecker + +open System.IO +open BenchmarkDotNet.Attributes +open Brahma.FSharp +open GraphBLAS.FSharp +open GraphBLAS.FSharp.IO +open GraphBLAS.FSharp.Backend.Quotes +open GraphBLAS.FSharp.Objects +open GraphBLAS.FSharp.Objects.ClContextExtensions +open GraphBLAS.FSharp.Benchmarks + +[] +[] +[] +[)>] +type Benchmarks<'elem when 'elem : struct>( + buildFunToBenchmark, + converter: string -> 'elem, + converterBool, + buildMatrix) = + + let mutable funToBenchmark = None + + let mutable matrix = Unchecked.defaultof> + + let mutable matrixHost = Unchecked.defaultof<_> + + member val ResultMatrix = Unchecked.defaultof option> with get, set + + [] + member val OclContextInfo = Unchecked.defaultof with get, set + + [] + member val InputMatrixReader = Unchecked.defaultof with get, set + + member this.OclContext: ClContext = (fst this.OclContextInfo).ClContext + member this.WorkGroupSize = snd this.OclContextInfo + + member this.Processor = + let p = (fst this.OclContextInfo).Queue + p.Error.Add(fun e -> failwithf "%A" e) + p + + static member AvailableContexts = Utils.availableContexts + + static member InputMatrixProviderBuilder pathToConfig = + let datasetFolder = "" + pathToConfig + |> Utils.getMatricesFilenames + |> Seq.map + (fun matrixFilename -> + printfn "%A" matrixFilename + + match Path.GetExtension matrixFilename with + | ".mtx" -> + MtxReader(Utils.getFullPathToMatrix datasetFolder matrixFilename) + | _ -> failwith "Unsupported matrix format") + + member this.FunToBenchmark = + match funToBenchmark with + | None -> + let x = buildFunToBenchmark this.OclContext this.WorkGroupSize + funToBenchmark <- Some x + x + | Some x -> x + + member this.ReadMatrix (reader: MtxReader) = + let converter = + match reader.Field with + | Pattern -> converterBool + | _ -> converter + + reader.ReadMatrix converter + + member this.Mxm() = + this.ResultMatrix <- this.FunToBenchmark this.Processor DeviceOnly matrix matrix + + member this.ClearInputMatrices() = + matrix.Dispose this.Processor + + member this.ClearResult() = + match this.ResultMatrix with + | Some matrix -> matrix.Dispose this.Processor + | None -> () + + member this.ReadMatrices() = + matrixHost <- this.ReadMatrix this.InputMatrixReader + + member this.LoadMatricesToGPU () = + matrix <- buildMatrix this.OclContext matrixHost + + abstract member GlobalSetup : unit -> unit + + abstract member Benchmark : unit -> unit + + abstract member IterationCleanup : unit -> unit + + abstract member GlobalCleanup : unit -> unit + +module WithoutTransfer = + type Benchmark<'elem when 'elem : struct>( + buildFunToBenchmark, + converter: string -> 'elem, + converterBool, + buildMatrix) = + + inherit Benchmarks<'elem>( + buildFunToBenchmark, + converter, + converterBool, + buildMatrix) + + [] + override this.GlobalSetup() = + this.ReadMatrices() + this.LoadMatricesToGPU() + + [] + override this.Benchmark() = + this.Mxm() + this.Processor.PostAndReply(Msg.MsgNotifyMe) + + [] + override this.IterationCleanup () = + this.ClearResult() + + [] + override this.GlobalCleanup () = + this.ClearInputMatrices() + + type Float32() = + + inherit Benchmark( + Operations.kronecker (ArithmeticOperations.float32MulOption), + float32, + (fun _ -> Utils.nextSingle (System.Random())), + (fun context matrix -> ClMatrix.CSR <| matrix.ToCSR.ToDevice context) + ) + + static member InputMatrixProvider = + Benchmarks<_>.InputMatrixProviderBuilder "Kronecker.txt" diff --git a/src/GraphBLAS-sharp.Backend/Common/ClArray.fs b/src/GraphBLAS-sharp.Backend/Common/ClArray.fs index ad6b3caf..6bea80a1 100644 --- a/src/GraphBLAS-sharp.Backend/Common/ClArray.fs +++ b/src/GraphBLAS-sharp.Backend/Common/ClArray.fs @@ -728,11 +728,11 @@ module ClArray = bound<'a, int> Search.Bin.lowerBound clContext /// - /// Gets the value at the specified position from the input array. + /// Gets the value at the specified position from the input array and insert it into given ClCell. /// /// OpenCL context. /// Should be a power of 2 and greater than 1. - let item<'a> (clContext: ClContext) workGroupSize = + let itemTo<'a> (clContext: ClContext) workGroupSize = let kernel = <@ fun (ndRange: Range1D) index (array: ClArray<'a>) (result: ClCell<'a>) -> @@ -744,21 +744,37 @@ module ClArray = let program = clContext.Compile kernel - fun (processor: MailboxProcessor<_>) (index: int) (array: ClArray<'a>) -> + fun (processor: MailboxProcessor<_>) (index: int) (array: ClArray<'a>) (output: ClCell<'a>) -> if index < 0 || index >= array.Length then failwith "Index out of range" - let result = - clContext.CreateClCell Unchecked.defaultof<'a> - let kernel = program.GetKernel() let ndRange = Range1D.CreateValid(1, workGroupSize) - processor.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange index array result)) + processor.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange index array output)) processor.Post(Msg.CreateRunMsg<_, _> kernel) + /// + /// Gets the value at the specified position from the input array. + /// + /// OpenCL context. + /// Should be a power of 2 and greater than 1. + let item<'a> (clContext: ClContext) workGroupSize = + + let itemTo = itemTo clContext workGroupSize + + fun (processor: MailboxProcessor<_>) (index: int) (array: ClArray<'a>) -> + + if index < 0 || index >= array.Length then + failwith "Index out of range" + + let result = + clContext.CreateClCell Unchecked.defaultof<'a> + + itemTo processor index array result + result /// diff --git a/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs b/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs index 9ff810bf..8c5835ba 100644 --- a/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs +++ b/src/GraphBLAS-sharp.Backend/Operations/Kronecker.fs @@ -171,7 +171,7 @@ module internal Kronecker = let private setPositions<'c when 'c: struct> (clContext: ClContext) workGroupSize = let setPositions = - <@ fun (ndRange: Range1D) rowCount columnCount startIndex (rowOffset: ClCell) (columnOffset: ClCell) (bitmap: ClArray) (values: ClArray<'c>) (resultRows: ClArray) (resultColumns: ClArray) (resultValues: ClArray<'c>) -> + <@ fun (ndRange: Range1D) rowCount columnCount startIndex (rowOffset: int) (columnOffset: int) (bitmap: ClArray) (values: ClArray<'c>) (resultRows: ClArray) (resultColumns: ClArray) (resultValues: ClArray<'c>) -> let gid = ndRange.GlobalID0 @@ -184,8 +184,8 @@ module internal Kronecker = let index = startIndex + bitmap.[gid] - 1 - resultRows.[index] <- rowIndex + rowOffset.Value - resultColumns.[index] <- columnIndex + columnOffset.Value + resultRows.[index] <- rowIndex + rowOffset + resultColumns.[index] <- columnIndex + columnOffset resultValues.[index] <- values.[gid] @> let kernel = clContext.Compile <| setPositions @@ -202,9 +202,6 @@ module internal Kronecker = let kernel = kernel.GetKernel() - let rowOffset = rowOffset |> clContext.CreateClCell - let columnOffset = columnOffset |> clContext.CreateClCell - processor.Post( Msg.MsgSetArguments (fun () -> @@ -224,23 +221,20 @@ module internal Kronecker = processor.Post(Msg.CreateRunMsg<_, _> kernel) - rowOffset.Free processor - columnOffset.Free processor - (sum.ToHostAndFree processor) + startIndex let private copyToResult (clContext: ClContext) workGroupSize = let copyToResult = - <@ fun (ndRange: Range1D) startIndex sourceLength (rowOffset: ClCell) (columnOffset: ClCell) (sourceRows: ClArray) (sourceColumns: ClArray) (sourceValues: ClArray<'c>) (resultRows: ClArray) (resultColumns: ClArray) (resultValues: ClArray<'c>) -> + <@ fun (ndRange: Range1D) startIndex sourceLength (rowOffset: int) (columnOffset: int) (sourceRows: ClArray) (sourceColumns: ClArray) (sourceValues: ClArray<'c>) (resultRows: ClArray) (resultColumns: ClArray) (resultValues: ClArray<'c>) -> let gid = ndRange.GlobalID0 if gid < sourceLength then let index = startIndex + gid - resultRows.[index] <- rowOffset.Value + sourceRows.[gid] - resultColumns.[index] <- columnOffset.Value + sourceColumns.[gid] + resultRows.[index] <- rowOffset + sourceRows.[gid] + resultColumns.[index] <- columnOffset + sourceColumns.[gid] resultValues.[index] <- sourceValues.[gid] @> let kernel = clContext.Compile <| copyToResult @@ -252,9 +246,6 @@ module internal Kronecker = let kernel = kernel.GetKernel() - let rowOffset = rowOffset |> clContext.CreateClCell - let columnOffset = columnOffset |> clContext.CreateClCell - processor.Post( Msg.MsgSetArguments (fun () -> @@ -274,14 +265,11 @@ module internal Kronecker = processor.Post(Msg.CreateRunMsg<_, _> kernel) - rowOffset.Free processor - columnOffset.Free processor - let private insertZero (clContext: ClContext) workGroupSize = let copy = copyToResult clContext workGroupSize - fun queue startIndex (zeroCounts: int list array) (matrixZero: COO<'c>) resultMatrix -> + fun queue startIndex (zeroCounts: ClArray array) (matrixZero: COO<'c>) resultMatrix -> let rowCount = zeroCounts.Length @@ -298,32 +286,31 @@ module internal Kronecker = startIndex <- startIndex + matrixZero.NNZ - let rec insertInRowRec zeroCounts row column = - match zeroCounts with - | [] -> () - | h :: tl -> - insertMany row column h + for row in 0 .. rowCount - 1 do + let zeroCountInRow = zeroCounts.[row].ToHostAndFree queue - insertInRowRec tl row (h + column + 1) + let mutable column = 0 - for row in 0 .. rowCount - 1 do - insertInRowRec zeroCounts.[row] row 0 + for count in zeroCountInRow do + insertMany row column count + + column <- column + count + 1 let private insertNonZero (clContext: ClContext) workGroupSize op = - let item = ClArray.item clContext workGroupSize + let itemTo = ClArray.itemTo clContext workGroupSize let preparePositions = preparePositions clContext workGroupSize op let setPositions = setPositions clContext workGroupSize - fun queue (rowsEdges: (int * int) array) (matrixRight: CSR<'b>) (leftValues: ClArray<'a>) (leftColsHost: int array) (resultMatrix: COO<'c>) -> + fun queue (rowBoundaries: (int * int) array) (matrixRight: CSR<'b>) (leftValues: ClArray<'a>) (leftColsHost: int array) (resultMatrix: COO<'c>) -> let setPositions = setPositions queue matrixRight.RowCount matrixRight.ColumnCount - let rowCount = rowsEdges.Length + let rowCount = rowBoundaries.Length let length = matrixRight.RowCount * matrixRight.ColumnCount @@ -336,11 +323,15 @@ module internal Kronecker = let mutable startIndex = 0 + let value = + clContext.CreateClCell Unchecked.defaultof<'a> + for row in 0 .. rowCount - 1 do - let leftEdge, rightEdge = rowsEdges.[row] + let leftEdge, rightEdge = rowBoundaries.[row] for i in leftEdge .. rightEdge do - let value = item queue i leftValues + itemTo queue i leftValues value + let column = leftColsHost.[i] let rowOffset = row * matrixRight.RowCount @@ -348,21 +339,88 @@ module internal Kronecker = preparePositions queue value matrixRight mappedMatrix bitmap - value.Free queue - startIndex <- setPositions rowOffset columnOffset startIndex resultMatrix mappedMatrix bitmap + value.Free queue bitmap.Free queue mappedMatrix.Free queue startIndex + let private countZeroElements (clContext: ClContext) workGroupSize = + + let countZeroElementsInRow = + <@ fun (ndRange: Range1D) (firstIndex: int) (lastIndex: int) (columnCount: int) (columns: ClArray) (result: ClArray) -> + + let gid = ndRange.GlobalID0 + + let nnzInRow = lastIndex - firstIndex + 1 + + if gid <= nnzInRow then + + if nnzInRow = 0 then + result.[0] <- columnCount + + elif gid = nnzInRow then + result.[nnzInRow] <- columnCount - columns.[lastIndex] - 1 + + elif gid = 0 then + result.[0] <- columns.[firstIndex] + + else + result.[gid] <- + columns.[firstIndex + gid] + - columns.[firstIndex + gid - 1] + - 1 @> + + let kernel = clContext.Compile countZeroElementsInRow + + fun (queue: MailboxProcessor<_>) (matrix: CSR<_>) (rowBoundaries: (int * int) array) -> + + let kernel = kernel.GetKernel() + + let (zeroCounts: ClArray array) = Array.zeroCreate matrix.RowCount + + for row in 0 .. matrix.RowCount - 1 do + + let firstIndex = fst rowBoundaries.[row] + let lastIndex = snd rowBoundaries.[row] + + let nnzInRow = lastIndex - firstIndex + 1 + let length = nnzInRow + 1 + + let ndRange = + Range1D.CreateValid(length, workGroupSize) + + let result = + clContext.CreateClArrayWithSpecificAllocationMode(DeviceOnly, length) + + queue.Post( + Msg.MsgSetArguments + (fun () -> + kernel.KernelFunc ndRange firstIndex lastIndex matrix.ColumnCount matrix.Columns result) + ) + + queue.Post(Msg.CreateRunMsg<_, _>(kernel)) + + zeroCounts.[row] <- result + + zeroCounts + let private mapAll<'a, 'b, 'c when 'a: struct and 'b: struct and 'c: struct and 'c: equality> (clContext: ClContext) workGroupSize (op: Expr<'a option -> 'b option -> 'c option>) = + let pairwise = ClArray.pairwise clContext workGroupSize + + let mapInPlace = + ClArray.mapInPlace <@ fun (a, b) -> (a, b - 1) @> clContext workGroupSize + + let countZeroElements = + countZeroElements clContext workGroupSize + let insertNonZero = insertNonZero clContext workGroupSize op let insertZero = insertZero clContext workGroupSize @@ -386,35 +444,22 @@ module internal Kronecker = RowCount = matrixLeft.RowCount * matrixRight.RowCount ColumnCount = matrixLeft.ColumnCount * matrixRight.ColumnCount } - let leftRowPointers = matrixLeft.RowPointers.ToHost queue let leftColumns = matrixLeft.Columns.ToHost queue - let nnzInRows = - leftRowPointers - |> Array.pairwise - |> Array.map (fun (fst, snd) -> snd - fst) - - let rowsEdges = - leftRowPointers - |> Array.pairwise - |> Array.map (fun (fst, snd) -> (fst, snd - 1)) - - let (zeroCounts: int list array) = Array.zeroCreate matrixLeft.RowCount - - { 0 .. matrixLeft.RowCount - 1 } - |> Seq.iter2 - (fun edges i -> - zeroCounts.[i] <- - leftColumns.[fst edges..snd edges] - |> Array.toList - |> List.insertAt 0 -1 - |> List.insertAt (nnzInRows.[i] + 1) matrixLeft.ColumnCount - |> List.pairwise - |> List.map (fun (fstCol, sndCol) -> sndCol - fstCol - 1)) - rowsEdges + let pairsOfRowPointers = + pairwise queue DeviceOnly matrixLeft.RowPointers + |> Option.defaultWith + (fun () -> failwith "The state of the matrix is broken. The length of the rowPointers must be >= 2") + + mapInPlace queue pairsOfRowPointers + + let rowBoundaries = pairsOfRowPointers.ToHostAndFree queue + + let zeroCounts = + countZeroElements queue matrixLeft rowBoundaries let startIndex = - insertNonZero queue rowsEdges matrixRight matrixLeft.Values leftColumns resultMatrix + insertNonZero queue rowBoundaries matrixRight matrixLeft.Values leftColumns resultMatrix matrixZero |> Option.iter (fun m -> insertZero queue startIndex zeroCounts m resultMatrix)