Skip to content

Commit 8c06823

Browse files
authored
Merge pull request #2 from artemgl/coo-element-wise-addition-optimization
Coo element wise addition optimization
2 parents 897fa4b + 1a3bac6 commit 8c06823

3 files changed

Lines changed: 146 additions & 107 deletions

File tree

benchmarks/GraphBLAS-sharp.Benchmarks/BenchmarksEWiseAdd.fs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ type BoolConfig = Config<bool>
4343
type EWiseAddBenchmarks() =
4444
[<ParamsSource("AvaliableContexts")>]
4545
member val OclContext = Unchecked.defaultof<ClContext> with get, set
46-
46+
4747
[<IterationCleanup>]
4848
member this.ClearBuffers() =
4949
let (ClContext context) = this.OclContext

src/GraphBLAS-sharp/Implementations.fs

Lines changed: 86 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ open Helpers
88
open FSharp.Quotations.Evaluator
99
open Brahma.FSharp.OpenCL.WorkflowBuilder.Basic
1010
open Brahma.FSharp.OpenCL.WorkflowBuilder.Evaluation
11+
open Toolbox
1112

1213
type COOFormat<'a> = {
1314
Rows: int[]
@@ -175,6 +176,7 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
175176
(mask: Mask2D option)
176177
(semiring: Semiring<'a>) : OpenCLEvaluation<Matrix<'a>> =
177178

179+
let workGroupSize = Toolbox.workGroupSize
178180
let (BinaryOp append) = semiring.PlusMonoid.Append
179181
let zero = semiring.PlusMonoid.Zero
180182

@@ -193,11 +195,13 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
193195

194196
let allRows = Array.zeroCreate <| firstRows.Length + secondRows.Length
195197
let allColumns = Array.zeroCreate <| firstColumns.Length + secondColumns.Length
196-
let allValues = Array.init (firstValues.Length + secondValues.Length) (fun _ -> zero)
198+
let allValues = Array.create (firstValues.Length + secondValues.Length) zero
197199

198200
let longSide = firstRows.Length
199201
let shortSide = secondRows.Length
200202

203+
let allRowsLength = allRows.Length
204+
201205
let createSortedConcatenation =
202206
<@
203207
fun (ndRange: _1D)
@@ -213,38 +217,42 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
213217

214218
let i = ndRange.GlobalID0
215219

216-
let f n = if 0 > n + 1 - shortSide then 0 else n + 1 - shortSide
217-
let mutable leftEdge = f i
218-
// if 0 > i + 1 - shortSide then 0 else i + 1 - shortSide
219-
220-
let g n = if n > longSide - 1 then longSide - 1 else n
221-
let mutable rightEdge = g i
222-
// if i > longSide - 1 then longSide - 1 else i
223-
224-
while leftEdge <= rightEdge do
225-
let middleIdx = (leftEdge + rightEdge) / 2
226-
let firstRow, firstColumn = firstRowsBuffer.[middleIdx], firstColumnsBuffer.[middleIdx]
227-
let secondRow, secondColumn = secondRowsBuffer.[i - middleIdx], secondColumnsBuffer.[i - middleIdx]
228-
if firstRow < secondRow || firstRow = secondRow && firstColumn < secondColumn then leftEdge <- middleIdx + 1 else rightEdge <- middleIdx - 1
229-
230-
let boundaryX, boundaryY = rightEdge, i - leftEdge
231-
let firstRow, firstColumn = firstRowsBuffer.[boundaryX], firstColumnsBuffer.[boundaryX]
232-
let secondRow, secondColumn = secondRowsBuffer.[boundaryY], secondColumnsBuffer.[boundaryY]
233-
234-
if boundaryX < 0 || boundaryY >= 0 && (firstRow < secondRow || firstRow = secondRow && firstColumn < secondColumn) then
235-
allRowsBuffer.[i] <- secondRowsBuffer.[boundaryY]
236-
allColumnsBuffer.[i] <- secondColumnsBuffer.[boundaryY]
237-
allValuesBuffer.[i] <- secondValuesBuffer.[boundaryY]
238-
else
239-
allRowsBuffer.[i] <- firstRowsBuffer.[boundaryX]
240-
allColumnsBuffer.[i] <- firstColumnsBuffer.[boundaryX]
241-
allValuesBuffer.[i] <- firstValuesBuffer.[boundaryX]
220+
if i < allRowsLength then
221+
let f n = if 0 > n + 1 - shortSide then 0 else n + 1 - shortSide
222+
let mutable leftEdge = f i
223+
224+
let g n = if n > longSide - 1 then longSide - 1 else n
225+
let mutable rightEdge = g i
226+
227+
while leftEdge <= rightEdge do
228+
let middleIdx = (leftEdge + rightEdge) / 2
229+
let firstRow = firstRowsBuffer.[middleIdx]
230+
let firstColumn = firstColumnsBuffer.[middleIdx]
231+
let secondRow = secondRowsBuffer.[i - middleIdx]
232+
let secondColumn = secondColumnsBuffer.[i - middleIdx]
233+
if firstRow < secondRow || firstRow = secondRow && firstColumn < secondColumn then leftEdge <- middleIdx + 1 else rightEdge <- middleIdx - 1
234+
235+
let boundaryX = rightEdge
236+
let boundaryY = i - leftEdge
237+
let firstRow = firstRowsBuffer.[boundaryX]
238+
let firstColumn = firstColumnsBuffer.[boundaryX]
239+
let secondRow = secondRowsBuffer.[boundaryY]
240+
let secondColumn = secondColumnsBuffer.[boundaryY]
241+
242+
if boundaryX < 0 || boundaryY >= 0 && (firstRow < secondRow || firstRow = secondRow && firstColumn < secondColumn) then
243+
allRowsBuffer.[i] <- secondRow
244+
allColumnsBuffer.[i] <- secondColumn
245+
allValuesBuffer.[i] <- secondValuesBuffer.[boundaryY]
246+
else
247+
allRowsBuffer.[i] <- firstRow
248+
allColumnsBuffer.[i] <- firstColumn
249+
allValuesBuffer.[i] <- firstValuesBuffer.[boundaryX]
242250
@>
243251

244252
let createSortedConcatenation =
245253
opencl {
246254
let binder kernelP =
247-
let ndRange = _1D(allRows.Length)
255+
let ndRange = _1D(workSize allRows.Length, workGroupSize)
248256
kernelP
249257
ndRange
250258
firstRows
@@ -259,7 +267,7 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
259267
do! RunCommand createSortedConcatenation binder
260268
}
261269

262-
let auxiliaryArray = Array.init allRows.Length (fun _ -> 1)
270+
let auxiliaryArray = Array.create allRows.Length 1
263271

264272
let fillAuxiliaryArray =
265273
<@
@@ -269,17 +277,19 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
269277
(allValuesBuffer: 'a[])
270278
(auxiliaryArrayBuffer: int[]) ->
271279

272-
let i = ndRange.GlobalID0
280+
let i = ndRange.GlobalID0 + 1
273281

274-
if allRowsBuffer.[i] = allRowsBuffer.[i + 1] && allColumnsBuffer.[i] = allColumnsBuffer.[i + 1] then
275-
auxiliaryArrayBuffer.[i + 1] <- 0
276-
allValuesBuffer.[i] <- (%plus) allValuesBuffer.[i] allValuesBuffer.[i + 1]
282+
if i < allRowsLength && allRowsBuffer.[i - 1] = allRowsBuffer.[i] && allColumnsBuffer.[i - 1] = allColumnsBuffer.[i] then
283+
auxiliaryArrayBuffer.[i] <- 0
284+
let localResultBuffer = (%plus) allValuesBuffer.[i - 1] allValuesBuffer.[i]
285+
//Drop explicit zeroes
286+
if localResultBuffer = zero then auxiliaryArrayBuffer.[i] <- 0 else allValuesBuffer.[i] <- localResultBuffer
277287
@>
278288

279289
let fillAuxiliaryArray =
280290
opencl {
281291
let binder kernelP =
282-
let ndRange = _1D(allRows.Length - 1)
292+
let ndRange = _1D(workSize (allRows.Length - 1), workGroupSize)
283293
kernelP
284294
ndRange
285295
allRows
@@ -289,6 +299,8 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
289299
do! RunCommand fillAuxiliaryArray binder
290300
}
291301

302+
let auxiliaryArrayLength = auxiliaryArray.Length
303+
292304
let createUnion =
293305
<@
294306
fun (ndRange: _1D)
@@ -303,7 +315,7 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
303315

304316
let i = ndRange.GlobalID0
305317

306-
if auxiliaryArrayBuffer.[i] = 1 then
318+
if i < auxiliaryArrayLength && auxiliaryArrayBuffer.[i] = 1 then
307319
let index = prefixSumArrayBuffer.[i] - 1
308320

309321
resultRowsBuffer.[index] <- allRowsBuffer.[i]
@@ -313,13 +325,13 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
313325

314326
let resultRows = Array.zeroCreate allRows.Length
315327
let resultColumns = Array.zeroCreate allColumns.Length
316-
let resultValues = Array.init allValues.Length (fun _ -> zero)
328+
let resultValues = Array.create allValues.Length zero
317329

318330
let createUnion =
319331
opencl {
320-
let! prefixSumArray = Toolbox.prefixSum auxiliaryArray
332+
let! prefixSumArray = Toolbox.prefixSum2 auxiliaryArray
321333
let binder kernelP =
322-
let ndRange = _1D(auxiliaryArray.Length)
334+
let ndRange = _1D(workSize auxiliaryArray.Length, workGroupSize)
323335
kernelP
324336
ndRange
325337
allRows
@@ -337,7 +349,6 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
337349
do! createSortedConcatenation
338350
do! filterThroughMask
339351
do! fillAuxiliaryArray
340-
do! Toolbox.EWiseAdd.dropExplicitZeroes zero allValues auxiliaryArray
341352
do! createUnion
342353

343354
return upcast COOMatrix<'a>(this.RowCount, this.ColumnCount, resultRows, resultColumns, resultValues)
@@ -436,11 +447,13 @@ and SparseVector<'a when 'a : struct and 'a : equality>(size: int, indices: int[
436447
}
437448

438449
let allIndices = Array.zeroCreate <| firstIndices.Length + secondIndices.Length
439-
let allValues = Array.init (firstValues.Length + secondValues.Length) (fun _ -> zero)
450+
let allValues = Array.create (firstValues.Length + secondValues.Length) zero
440451

441452
let longSide = firstIndices.Length
442453
let shortSide = secondIndices.Length
443454

455+
let allIndicesLength = allIndices.Length
456+
444457
let createSortedConcatenation =
445458
<@
446459
fun (ndRange: _1D)
@@ -452,33 +465,33 @@ and SparseVector<'a when 'a : struct and 'a : equality>(size: int, indices: int[
452465
(allValuesBuffer: 'a[]) ->
453466

454467
let i = ndRange.GlobalID0
455-
456-
let f n = if 0 > n + 1 - shortSide then 0 else n + 1 - shortSide
457-
let mutable leftEdge = f i
458-
// if 0 > i + 1 - shortSide then 0 else i + 1 - shortSide
459-
460-
let g n = if n > longSide - 1 then longSide - 1 else n
461-
let mutable rightEdge = g i
462-
// if i > longSide - 1 then longSide - 1 else i
463-
464-
while leftEdge <= rightEdge do
465-
let middleIdx = (leftEdge + rightEdge) / 2
466-
if firstIndicesBuffer.[middleIdx] < secondIndicesBuffer.[i - middleIdx] then leftEdge <- middleIdx + 1 else rightEdge <- middleIdx - 1
467-
468-
let boundaryX, boundaryY = rightEdge, i - leftEdge
469-
470-
if boundaryX < 0 || boundaryY >= 0 && firstIndicesBuffer.[boundaryX] < secondIndicesBuffer.[boundaryY] then
471-
allIndicesBuffer.[i] <- secondIndicesBuffer.[boundaryY]
472-
allValuesBuffer.[i] <- secondValuesBuffer.[boundaryY]
473-
else
474-
allIndicesBuffer.[i] <- firstIndicesBuffer.[boundaryX]
475-
allValuesBuffer.[i] <- firstValuesBuffer.[boundaryX]
468+
if i < allIndicesLength then
469+
let f n = if 0 > n + 1 - shortSide then 0 else n + 1 - shortSide
470+
let mutable leftEdge = f i
471+
472+
let g n = if n > longSide - 1 then longSide - 1 else n
473+
let mutable rightEdge = g i
474+
475+
while leftEdge <= rightEdge do
476+
let middleIdx = (leftEdge + rightEdge) / 2
477+
if firstIndicesBuffer.[middleIdx] < secondIndicesBuffer.[i - middleIdx] then leftEdge <- middleIdx + 1 else rightEdge <- middleIdx - 1
478+
479+
let boundaryX, boundaryY = rightEdge, i - leftEdge
480+
let firstIndex = firstIndicesBuffer.[boundaryX]
481+
let secondIndex = secondIndicesBuffer.[boundaryY]
482+
483+
if boundaryX < 0 || boundaryY >= 0 && firstIndex < secondIndex then
484+
allIndicesBuffer.[i] <- secondIndex
485+
allValuesBuffer.[i] <- secondValuesBuffer.[boundaryY]
486+
else
487+
allIndicesBuffer.[i] <- firstIndex
488+
allValuesBuffer.[i] <- firstValuesBuffer.[boundaryX]
476489
@>
477490

478491
let createSortedConcatenation =
479492
opencl {
480493
let binder kernelP =
481-
let ndRange = _1D(allIndices.Length)
494+
let ndRange = _1D(workSize allIndices.Length, workGroupSize)
482495
kernelP
483496
ndRange
484497
firstIndices
@@ -490,7 +503,7 @@ and SparseVector<'a when 'a : struct and 'a : equality>(size: int, indices: int[
490503
do! RunCommand createSortedConcatenation binder
491504
}
492505

493-
let auxiliaryArray = Array.init allIndices.Length (fun _ -> 1)
506+
let auxiliaryArray = Array.create allIndices.Length 1
494507

495508
let fillAuxiliaryArray =
496509
<@
@@ -501,15 +514,16 @@ and SparseVector<'a when 'a : struct and 'a : equality>(size: int, indices: int[
501514

502515
let i = ndRange.GlobalID0
503516

504-
if allIndicesBuffer.[i] = allIndicesBuffer.[i + 1] then
517+
if i + 1 < allIndicesLength && allIndicesBuffer.[i] = allIndicesBuffer.[i + 1] then
505518
auxiliaryArrayBuffer.[i + 1] <- 0
506-
allValuesBuffer.[i] <- (%plus) allValuesBuffer.[i] allValuesBuffer.[i + 1]
519+
let localResultBuffer = (%plus) allValuesBuffer.[i] allValuesBuffer.[i + 1]
520+
if localResultBuffer = zero then auxiliaryArrayBuffer.[i] <- 0 else allValuesBuffer.[i] <- localResultBuffer
507521
@>
508522

509523
let fillAuxiliaryArray =
510524
opencl {
511525
let binder kernelP =
512-
let ndRange = _1D(allIndices.Length - 1)
526+
let ndRange = _1D(workSize (allIndices.Length - 1), workGroupSize)
513527
kernelP
514528
ndRange
515529
allIndices
@@ -518,6 +532,8 @@ and SparseVector<'a when 'a : struct and 'a : equality>(size: int, indices: int[
518532
do! RunCommand fillAuxiliaryArray binder
519533
}
520534

535+
let auxiliaryArrayLength = auxiliaryArray.Length
536+
521537
let createUnion =
522538
<@
523539
fun (ndRange: _1D)
@@ -530,21 +546,21 @@ and SparseVector<'a when 'a : struct and 'a : equality>(size: int, indices: int[
530546

531547
let i = ndRange.GlobalID0
532548

533-
if auxiliaryArrayBuffer.[i] = 1 then
549+
if i < auxiliaryArrayLength && auxiliaryArrayBuffer.[i] = 1 then
534550
let index = prefixSumArrayBuffer.[i] - 1
535551

536552
resultIndicesBuffer.[index] <- allIndicesBuffer.[i]
537553
resultValuesBuffer.[index] <- allValuesBuffer.[i]
538554
@>
539555

540556
let resultIndices = Array.zeroCreate allIndices.Length
541-
let resultValues = Array.init allValues.Length (fun _ -> zero)
557+
let resultValues = Array.create allValues.Length zero
542558

543559
let createUnion =
544560
opencl {
545-
let! prefixSumArray = Toolbox.prefixSum auxiliaryArray
561+
let! prefixSumArray = Toolbox.prefixSum2 auxiliaryArray
546562
let binder kernelP =
547-
let ndRange = _1D(auxiliaryArray.Length)
563+
let ndRange = _1D(workSize auxiliaryArray.Length, workGroupSize)
548564
kernelP
549565
ndRange
550566
allIndices
@@ -560,7 +576,6 @@ and SparseVector<'a when 'a : struct and 'a : equality>(size: int, indices: int[
560576
do! createSortedConcatenation
561577
do! filterThroughMask
562578
do! fillAuxiliaryArray
563-
do! Toolbox.EWiseAdd.dropExplicitZeroes zero allValues auxiliaryArray
564579
do! createUnion
565580

566581
return upcast SparseVector<'a>(this.Size, resultIndices, resultValues)

0 commit comments

Comments
 (0)