@@ -8,6 +8,7 @@ open Helpers
88open FSharp.Quotations .Evaluator
99open Brahma.FSharp .OpenCL .WorkflowBuilder .Basic
1010open Brahma.FSharp .OpenCL .WorkflowBuilder .Evaluation
11+ open Toolbox
1112
1213type COOFormat < 'a > = {
1314 Rows: int []
@@ -175,6 +176,7 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
175176 ( mask : Mask2D option )
176177 ( semiring : Semiring < 'a >) : OpenCLEvaluation < Matrix < 'a >> =
177178
179+ let workGroupSize = Toolbox.workGroupSize
178180 let ( BinaryOp append ) = semiring.PlusMonoid.Append
179181 let zero = semiring.PlusMonoid.Zero
180182
@@ -193,11 +195,13 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
193195
194196 let allRows = Array.zeroCreate <| firstRows.Length + secondRows.Length
195197 let allColumns = Array.zeroCreate <| firstColumns.Length + secondColumns.Length
196- let allValues = Array.init ( firstValues.Length + secondValues.Length) ( fun _ -> zero)
198+ let allValues = Array.create ( firstValues.Length + secondValues.Length) zero
197199
198200 let longSide = firstRows.Length
199201 let shortSide = secondRows.Length
200202
203+ let allRowsLength = allRows.Length
204+
201205 let createSortedConcatenation =
202206 <@
203207 fun ( ndRange : _1D )
@@ -213,38 +217,42 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
213217
214218 let i = ndRange.GlobalID0
215219
216- let f n = if 0 > n + 1 - shortSide then 0 else n + 1 - shortSide
217- let mutable leftEdge = f i
218- // if 0 > i + 1 - shortSide then 0 else i + 1 - shortSide
219-
220- let g n = if n > longSide - 1 then longSide - 1 else n
221- let mutable rightEdge = g i
222- // if i > longSide - 1 then longSide - 1 else i
223-
224- while leftEdge <= rightEdge do
225- let middleIdx = ( leftEdge + rightEdge) / 2
226- let firstRow , firstColumn = firstRowsBuffer.[ middleIdx], firstColumnsBuffer.[ middleIdx]
227- let secondRow , secondColumn = secondRowsBuffer.[ i - middleIdx], secondColumnsBuffer.[ i - middleIdx]
228- if firstRow < secondRow || firstRow = secondRow && firstColumn < secondColumn then leftEdge <- middleIdx + 1 else rightEdge <- middleIdx - 1
229-
230- let boundaryX , boundaryY = rightEdge, i - leftEdge
231- let firstRow , firstColumn = firstRowsBuffer.[ boundaryX], firstColumnsBuffer.[ boundaryX]
232- let secondRow , secondColumn = secondRowsBuffer.[ boundaryY], secondColumnsBuffer.[ boundaryY]
233-
234- if boundaryX < 0 || boundaryY >= 0 && ( firstRow < secondRow || firstRow = secondRow && firstColumn < secondColumn) then
235- allRowsBuffer.[ i] <- secondRowsBuffer.[ boundaryY]
236- allColumnsBuffer.[ i] <- secondColumnsBuffer.[ boundaryY]
237- allValuesBuffer.[ i] <- secondValuesBuffer.[ boundaryY]
238- else
239- allRowsBuffer.[ i] <- firstRowsBuffer.[ boundaryX]
240- allColumnsBuffer.[ i] <- firstColumnsBuffer.[ boundaryX]
241- allValuesBuffer.[ i] <- firstValuesBuffer.[ boundaryX]
220+ if i < allRowsLength then
221+ let f n = if 0 > n + 1 - shortSide then 0 else n + 1 - shortSide
222+ let mutable leftEdge = f i
223+
224+ let g n = if n > longSide - 1 then longSide - 1 else n
225+ let mutable rightEdge = g i
226+
227+ while leftEdge <= rightEdge do
228+ let middleIdx = ( leftEdge + rightEdge) / 2
229+ let firstRow = firstRowsBuffer.[ middleIdx]
230+ let firstColumn = firstColumnsBuffer.[ middleIdx]
231+ let secondRow = secondRowsBuffer.[ i - middleIdx]
232+ let secondColumn = secondColumnsBuffer.[ i - middleIdx]
233+ if firstRow < secondRow || firstRow = secondRow && firstColumn < secondColumn then leftEdge <- middleIdx + 1 else rightEdge <- middleIdx - 1
234+
235+ let boundaryX = rightEdge
236+ let boundaryY = i - leftEdge
237+ let firstRow = firstRowsBuffer.[ boundaryX]
238+ let firstColumn = firstColumnsBuffer.[ boundaryX]
239+ let secondRow = secondRowsBuffer.[ boundaryY]
240+ let secondColumn = secondColumnsBuffer.[ boundaryY]
241+
242+ if boundaryX < 0 || boundaryY >= 0 && ( firstRow < secondRow || firstRow = secondRow && firstColumn < secondColumn) then
243+ allRowsBuffer.[ i] <- secondRow
244+ allColumnsBuffer.[ i] <- secondColumn
245+ allValuesBuffer.[ i] <- secondValuesBuffer.[ boundaryY]
246+ else
247+ allRowsBuffer.[ i] <- firstRow
248+ allColumnsBuffer.[ i] <- firstColumn
249+ allValuesBuffer.[ i] <- firstValuesBuffer.[ boundaryX]
242250 @>
243251
244252 let createSortedConcatenation =
245253 opencl {
246254 let binder kernelP =
247- let ndRange = _ 1D( allRows.Length)
255+ let ndRange = _ 1D( workSize allRows.Length, workGroupSize )
248256 kernelP
249257 ndRange
250258 firstRows
@@ -259,7 +267,7 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
259267 do ! RunCommand createSortedConcatenation binder
260268 }
261269
262- let auxiliaryArray = Array.init allRows.Length ( fun _ -> 1 )
270+ let auxiliaryArray = Array.create allRows.Length 1
263271
264272 let fillAuxiliaryArray =
265273 <@
@@ -269,17 +277,19 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
269277 ( allValuesBuffer : 'a [])
270278 ( auxiliaryArrayBuffer : int []) ->
271279
272- let i = ndRange.GlobalID0
280+ let i = ndRange.GlobalID0 + 1
273281
274- if allRowsBuffer.[ i] = allRowsBuffer.[ i + 1 ] && allColumnsBuffer.[ i] = allColumnsBuffer.[ i + 1 ] then
275- auxiliaryArrayBuffer.[ i + 1 ] <- 0
276- allValuesBuffer.[ i] <- (% plus) allValuesBuffer.[ i] allValuesBuffer.[ i + 1 ]
282+ if i < allRowsLength && allRowsBuffer.[ i - 1 ] = allRowsBuffer.[ i] && allColumnsBuffer.[ i - 1 ] = allColumnsBuffer.[ i] then
283+ auxiliaryArrayBuffer.[ i] <- 0
284+ let localResultBuffer = (% plus) allValuesBuffer.[ i - 1 ] allValuesBuffer.[ i]
285+ //Drop explicit zeroes
286+ if localResultBuffer = zero then auxiliaryArrayBuffer.[ i] <- 0 else allValuesBuffer.[ i] <- localResultBuffer
277287 @>
278288
279289 let fillAuxiliaryArray =
280290 opencl {
281291 let binder kernelP =
282- let ndRange = _ 1D( allRows.Length - 1 )
292+ let ndRange = _ 1D( workSize ( allRows.Length - 1 ), workGroupSize )
283293 kernelP
284294 ndRange
285295 allRows
@@ -289,6 +299,8 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
289299 do ! RunCommand fillAuxiliaryArray binder
290300 }
291301
302+ let auxiliaryArrayLength = auxiliaryArray.Length
303+
292304 let createUnion =
293305 <@
294306 fun ( ndRange : _1D )
@@ -303,7 +315,7 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
303315
304316 let i = ndRange.GlobalID0
305317
306- if auxiliaryArrayBuffer.[ i] = 1 then
318+ if i < auxiliaryArrayLength && auxiliaryArrayBuffer.[ i] = 1 then
307319 let index = prefixSumArrayBuffer.[ i] - 1
308320
309321 resultRowsBuffer.[ index] <- allRowsBuffer.[ i]
@@ -313,13 +325,13 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
313325
314326 let resultRows = Array.zeroCreate allRows.Length
315327 let resultColumns = Array.zeroCreate allColumns.Length
316- let resultValues = Array.init allValues.Length ( fun _ -> zero)
328+ let resultValues = Array.create allValues.Length zero
317329
318330 let createUnion =
319331 opencl {
320- let! prefixSumArray = Toolbox.prefixSum auxiliaryArray
332+ let! prefixSumArray = Toolbox.prefixSum2 auxiliaryArray
321333 let binder kernelP =
322- let ndRange = _ 1D( auxiliaryArray.Length)
334+ let ndRange = _ 1D( workSize auxiliaryArray.Length, workGroupSize )
323335 kernelP
324336 ndRange
325337 allRows
@@ -337,7 +349,6 @@ and COOMatrix<'a when 'a : struct and 'a : equality>(rowCount: int, columnCount:
337349 do ! createSortedConcatenation
338350 do ! filterThroughMask
339351 do ! fillAuxiliaryArray
340- do ! Toolbox.EWiseAdd.dropExplicitZeroes zero allValues auxiliaryArray
341352 do ! createUnion
342353
343354 return upcast COOMatrix< 'a>( this.RowCount, this.ColumnCount, resultRows, resultColumns, resultValues)
@@ -436,11 +447,13 @@ and SparseVector<'a when 'a : struct and 'a : equality>(size: int, indices: int[
436447 }
437448
438449 let allIndices = Array.zeroCreate <| firstIndices.Length + secondIndices.Length
439- let allValues = Array.init ( firstValues.Length + secondValues.Length) ( fun _ -> zero)
450+ let allValues = Array.create ( firstValues.Length + secondValues.Length) zero
440451
441452 let longSide = firstIndices.Length
442453 let shortSide = secondIndices.Length
443454
455+ let allIndicesLength = allIndices.Length
456+
444457 let createSortedConcatenation =
445458 <@
446459 fun ( ndRange : _1D )
@@ -452,33 +465,33 @@ and SparseVector<'a when 'a : struct and 'a : equality>(size: int, indices: int[
452465 ( allValuesBuffer : 'a []) ->
453466
454467 let i = ndRange.GlobalID0
455-
456- let f n = if 0 > n + 1 - shortSide then 0 else n + 1 - shortSide
457- let mutable leftEdge = f i
458- // if 0 > i + 1 - shortSide then 0 else i + 1 - shortSide
459-
460- let g n = if n > longSide - 1 then longSide - 1 else n
461- let mutable rightEdge = g i
462- // if i > longSide - 1 then longSide - 1 else i
463-
464- while leftEdge <= rightEdge do
465- let middleIdx = ( leftEdge + rightEdge ) / 2
466- if firstIndicesBuffer .[ middleIdx ] < secondIndicesBuffer .[ i - middleIdx ] then leftEdge <- middleIdx + 1 else rightEdge <- middleIdx - 1
467-
468- let boundaryX , boundaryY = rightEdge , i - leftEdge
469-
470- if boundaryX < 0 || boundaryY >= 0 && firstIndicesBuffer .[ boundaryX ] < secondIndicesBuffer .[ boundaryY ] then
471- allIndicesBuffer.[ i] <- secondIndicesBuffer .[ boundaryY ]
472- allValuesBuffer.[ i] <- secondValuesBuffer.[ boundaryY]
473- else
474- allIndicesBuffer.[ i] <- firstIndicesBuffer .[ boundaryX ]
475- allValuesBuffer.[ i] <- firstValuesBuffer.[ boundaryX]
468+ if i < allIndicesLength then
469+ let f n = if 0 > n + 1 - shortSide then 0 else n + 1 - shortSide
470+ let mutable leftEdge = f i
471+
472+ let g n = if n > longSide - 1 then longSide - 1 else n
473+ let mutable rightEdge = g i
474+
475+ while leftEdge <= rightEdge do
476+ let middleIdx = ( leftEdge + rightEdge ) / 2
477+ if firstIndicesBuffer .[ middleIdx ] < secondIndicesBuffer .[ i - middleIdx ] then leftEdge <- middleIdx + 1 else rightEdge <- middleIdx - 1
478+
479+ let boundaryX , boundaryY = rightEdge, i - leftEdge
480+ let firstIndex = firstIndicesBuffer .[ boundaryX ]
481+ let secondIndex = secondIndicesBuffer .[ boundaryY ]
482+
483+ if boundaryX < 0 || boundaryY >= 0 && firstIndex < secondIndex then
484+ allIndicesBuffer.[ i] <- secondIndex
485+ allValuesBuffer.[ i] <- secondValuesBuffer.[ boundaryY]
486+ else
487+ allIndicesBuffer.[ i] <- firstIndex
488+ allValuesBuffer.[ i] <- firstValuesBuffer.[ boundaryX]
476489 @>
477490
478491 let createSortedConcatenation =
479492 opencl {
480493 let binder kernelP =
481- let ndRange = _ 1D( allIndices.Length)
494+ let ndRange = _ 1D( workSize allIndices.Length, workGroupSize )
482495 kernelP
483496 ndRange
484497 firstIndices
@@ -490,7 +503,7 @@ and SparseVector<'a when 'a : struct and 'a : equality>(size: int, indices: int[
490503 do ! RunCommand createSortedConcatenation binder
491504 }
492505
493- let auxiliaryArray = Array.init allIndices.Length ( fun _ -> 1 )
506+ let auxiliaryArray = Array.create allIndices.Length 1
494507
495508 let fillAuxiliaryArray =
496509 <@
@@ -501,15 +514,16 @@ and SparseVector<'a when 'a : struct and 'a : equality>(size: int, indices: int[
501514
502515 let i = ndRange.GlobalID0
503516
504- if allIndicesBuffer.[ i] = allIndicesBuffer.[ i + 1 ] then
517+ if i + 1 < allIndicesLength && allIndicesBuffer.[ i] = allIndicesBuffer.[ i + 1 ] then
505518 auxiliaryArrayBuffer.[ i + 1 ] <- 0
506- allValuesBuffer.[ i] <- (% plus) allValuesBuffer.[ i] allValuesBuffer.[ i + 1 ]
519+ let localResultBuffer = (% plus) allValuesBuffer.[ i] allValuesBuffer.[ i + 1 ]
520+ if localResultBuffer = zero then auxiliaryArrayBuffer.[ i] <- 0 else allValuesBuffer.[ i] <- localResultBuffer
507521 @>
508522
509523 let fillAuxiliaryArray =
510524 opencl {
511525 let binder kernelP =
512- let ndRange = _ 1D( allIndices.Length - 1 )
526+ let ndRange = _ 1D( workSize ( allIndices.Length - 1 ), workGroupSize )
513527 kernelP
514528 ndRange
515529 allIndices
@@ -518,6 +532,8 @@ and SparseVector<'a when 'a : struct and 'a : equality>(size: int, indices: int[
518532 do ! RunCommand fillAuxiliaryArray binder
519533 }
520534
535+ let auxiliaryArrayLength = auxiliaryArray.Length
536+
521537 let createUnion =
522538 <@
523539 fun ( ndRange : _1D )
@@ -530,21 +546,21 @@ and SparseVector<'a when 'a : struct and 'a : equality>(size: int, indices: int[
530546
531547 let i = ndRange.GlobalID0
532548
533- if auxiliaryArrayBuffer.[ i] = 1 then
549+ if i < auxiliaryArrayLength && auxiliaryArrayBuffer.[ i] = 1 then
534550 let index = prefixSumArrayBuffer.[ i] - 1
535551
536552 resultIndicesBuffer.[ index] <- allIndicesBuffer.[ i]
537553 resultValuesBuffer.[ index] <- allValuesBuffer.[ i]
538554 @>
539555
540556 let resultIndices = Array.zeroCreate allIndices.Length
541- let resultValues = Array.init allValues.Length ( fun _ -> zero)
557+ let resultValues = Array.create allValues.Length zero
542558
543559 let createUnion =
544560 opencl {
545- let! prefixSumArray = Toolbox.prefixSum auxiliaryArray
561+ let! prefixSumArray = Toolbox.prefixSum2 auxiliaryArray
546562 let binder kernelP =
547- let ndRange = _ 1D( auxiliaryArray.Length)
563+ let ndRange = _ 1D( workSize auxiliaryArray.Length, workGroupSize )
548564 kernelP
549565 ndRange
550566 allIndices
@@ -560,7 +576,6 @@ and SparseVector<'a when 'a : struct and 'a : equality>(size: int, indices: int[
560576 do ! createSortedConcatenation
561577 do ! filterThroughMask
562578 do ! fillAuxiliaryArray
563- do ! Toolbox.EWiseAdd.dropExplicitZeroes zero allValues auxiliaryArray
564579 do ! createUnion
565580
566581 return upcast SparseVector< 'a>( this.Size, resultIndices, resultValues)
0 commit comments