Skip to content

Commit 3fc1b44

Browse files
committed
add: Search module
1 parent 5409b2e commit 3fc1b44

12 files changed

Lines changed: 329 additions & 58 deletions

File tree

src/GraphBLAS-sharp.Backend/Common/Sum.fs

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ open GraphBLAS.FSharp.Backend.Quotes
55
open Microsoft.FSharp.Control
66
open Microsoft.FSharp.Quotations
77
open GraphBLAS.FSharp.Backend.Objects.ClContext
8+
open GraphBLAS.FSharp.Backend.Objects.ClCell
9+
open GraphBLAS.FSharp.Backend.Objects.ArraysExtensions
810

911
module Reduce =
1012
let private runGeneral (clContext: ClContext) workGroupSize scan scanToCell =
@@ -235,3 +237,166 @@ module Reduce =
235237
runGeneral clContext workGroupSize scan scanToCell
236238

237239
fun (processor: MailboxProcessor<_>) (array: ClArray<'a>) -> run processor array
240+
241+
module ByKey =
242+
let sequential (clContext: ClContext) workGroupSize (reduceOp: Expr<'a -> 'a -> 'a>) =
243+
244+
let kernel =
245+
<@ fun (ndRange: Range1D) length (keys: ClArray<int>) (values: ClArray<'a>) (reducedValues: ClArray<'a>) (reducedKeys: ClArray<int>) ->
246+
247+
let gid = ndRange.GlobalID0
248+
249+
if gid = 0 then
250+
let mutable currentKey = keys.[gid]
251+
let mutable segmentResult = values.[gid]
252+
let mutable segmentCount = 0
253+
254+
for i in 1 .. length - 1 do
255+
if currentKey = keys.[i] then
256+
segmentResult <- (%reduceOp) segmentResult values.[i]
257+
else
258+
reducedValues.[segmentCount] <- segmentResult
259+
reducedKeys.[segmentCount] <- currentKey
260+
261+
segmentCount <- segmentCount + 1
262+
currentKey <- keys.[i]
263+
segmentResult <- values.[i]
264+
265+
reducedKeys.[segmentCount] <- currentKey
266+
reducedValues.[segmentCount] <- segmentResult @>
267+
268+
let kernel = clContext.Compile kernel
269+
270+
fun (processor: MailboxProcessor<_>) allocationMode (resultLength: int) (keys: ClArray<int>) (values: ClArray<'a>) ->
271+
272+
let reducedValues = clContext.CreateClArrayWithSpecificAllocationMode(allocationMode, resultLength)
273+
274+
let reducedKeys = clContext.CreateClArrayWithSpecificAllocationMode(allocationMode, resultLength)
275+
276+
let ndRange = Range1D.CreateValid(resultLength, workGroupSize)
277+
278+
let kernel = kernel.GetKernel()
279+
280+
processor.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange resultLength keys values reducedValues reducedKeys))
281+
282+
processor.Post(Msg.CreateRunMsg<_, _>(kernel))
283+
284+
let segmentSequential (clContext: ClContext) workGroupSize (reduceOp: Expr<'a -> 'a -> 'a>) =
285+
286+
let kernel =
287+
<@ fun (ndRange: Range1D) uniqueKeyCount (offsets: ClArray<int>) (keys: ClArray<int>) (values: ClArray<'a>) (reducedValues: ClArray<'a>) (reducedKeys: ClArray<int>) ->
288+
289+
let gid = ndRange.GlobalID0
290+
291+
if gid < uniqueKeyCount then
292+
let startPosition = offsets.[gid]
293+
let sourceKey = keys.[startPosition]
294+
295+
let mutable nextPosition = startPosition + 1 // TODO()
296+
let mutable nextKey = keys.[nextPosition]
297+
let mutable sum = values.[startPosition]
298+
299+
while nextKey = sourceKey do
300+
sum <- (%reduceOp) sum values.[nextPosition]
301+
302+
nextPosition <- nextPosition + 1
303+
nextKey <- keys.[nextPosition]
304+
305+
reducedValues.[gid] <- sum
306+
reducedKeys.[gid] <- sourceKey @>
307+
308+
let kernel = clContext.Compile kernel
309+
310+
let getUniqueBitmap = ClArray.getUniqueBitmap clContext workGroupSize
311+
312+
let prefixSum = PrefixSum.runExcludeInplace <@ (+) @> clContext workGroupSize
313+
314+
let removeDuplicates = ClArray.removeDuplications clContext workGroupSize
315+
316+
fun (processor: MailboxProcessor<_>) allocationMode (keys: ClArray<int>) (values: ClArray<'a>) ->
317+
318+
let bitmap = getUniqueBitmap processor DeviceOnly keys
319+
320+
let resultLength = (prefixSum processor bitmap 0).ToHostAndFree processor
321+
322+
let offsets = removeDuplicates processor bitmap
323+
324+
bitmap.Free processor
325+
326+
let reducedValues = clContext.CreateClArrayWithSpecificAllocationMode(allocationMode, resultLength)
327+
328+
let reducedKeys = clContext.CreateClArrayWithSpecificAllocationMode(allocationMode, resultLength)
329+
330+
let ndRange = Range1D.CreateValid(resultLength, workGroupSize)
331+
332+
let kernel = kernel.GetKernel()
333+
334+
processor.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange resultLength offsets keys values reducedValues reducedKeys))
335+
336+
processor.Post(Msg.CreateRunMsg<_, _>(kernel))
337+
338+
let oneWorkGroupSegments (clContext: ClContext) workGroupSize (reduceOp: Expr<'a -> 'a -> 'a>) =
339+
340+
let kernel =
341+
<@ fun (ndRange: Range1D) length (keys: ClArray<int>) (values: ClArray<'a>) (reducedValues: ClArray<'a>) (reducedKeys: ClArray<int>) ->
342+
343+
let lid = ndRange.GlobalID0
344+
345+
// load values to local memory (may be without it)
346+
let localValues = localArray<'a> length
347+
if lid < length then localValues.[lid] <- values.[lid]
348+
349+
// load keys to local memory (mb without it)
350+
let localKeys = localArray<int> length
351+
if lid < length then localKeys.[lid] <- keys.[lid]
352+
353+
// get unique keys bitmap
354+
let localBitmap = localArray<int> length
355+
(%PreparePositions.getUniqueBitmapLocal<int>) localKeys length lid localBitmap
356+
357+
// get positions from bitmap by prefix sum
358+
// ??? get bitmap by prefix sum in another kernel ???
359+
(%SubSum.localIntPrefixSum) lid workGroupSize localBitmap
360+
let localPositions = localBitmap
361+
362+
let uniqueKeysCount = localPositions.[length - 1]
363+
364+
if lid < uniqueKeysCount then
365+
let itemKeyId = lid + 1
366+
// we can count start position by itemKeyId
367+
// but loose coalesced memory read pattern
368+
369+
let startKeyIndex =
370+
(%Search.Bin.lowerPosition) length itemKeyId localPositions
371+
372+
match startKeyIndex with
373+
| Some startPosition ->
374+
let sourcePosition = localPositions.[startPosition]
375+
let mutable currentSum = localValues.[startPosition]
376+
let mutable currentIndex = startPosition + 1
377+
378+
while currentIndex < length
379+
&& localPositions.[currentIndex] = sourcePosition do
380+
381+
currentSum <- (%reduceOp) currentSum localValues.[currentIndex]
382+
currentIndex <- currentIndex + 1
383+
384+
reducedKeys.[lid] <- localKeys.[startPosition]
385+
reducedValues.[lid] <- currentSum
386+
| None -> () @>
387+
388+
let kernel = clContext.Compile kernel
389+
390+
fun (processor: MailboxProcessor<_>) allocationMode (resultLength: int) (keys: ClArray<int>) (values: ClArray<'a>) ->
391+
392+
let reducedValues = clContext.CreateClArrayWithSpecificAllocationMode(allocationMode, resultLength)
393+
394+
let reducedKeys = clContext.CreateClArrayWithSpecificAllocationMode(allocationMode, resultLength)
395+
396+
let ndRange = Range1D.CreateValid(resultLength, workGroupSize)
397+
398+
let kernel = kernel.GetKernel()
399+
400+
processor.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange resultLength keys values reducedValues reducedKeys))
401+
402+
processor.Post(Msg.CreateRunMsg<_, _>(kernel))

src/GraphBLAS-sharp.Backend/GraphBLAS-sharp.Backend.fsproj

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,12 @@
2626
<Compile Include="Quotes/PreparePositions.fs" />
2727
<Compile Include="Quotes/Predicates.fs" />
2828
<Compile Include="Quotes/Map.fs" />
29+
<Compile Include="Quotes\Search.fs" />
2930
<Compile Include="Common/Scatter.fs" />
3031
<Compile Include="Common/Utils.fs" />
31-
<Compile Include="Common/Sum.fs" />
3232
<Compile Include="Common/PrefixSum.fs" />
3333
<Compile Include="Common/ClArray.fs" />
34+
<Compile Include="Common\Sum.fs" />
3435
<Compile Include="Common/BitonicSort.fs" />
3536
<Compile Include="Predefined/PrefixSum.fs" />
3637
<!--Compile Include="Matrices.fs" /-->

src/GraphBLAS-sharp.Backend/Matrix/COOMatrix/Map2.fs

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,33 +7,9 @@ open GraphBLAS.FSharp.Backend.Objects
77
open GraphBLAS.FSharp.Backend
88
open GraphBLAS.FSharp.Backend.Objects.ClMatrix
99
open GraphBLAS.FSharp.Backend.Objects.ClContext
10+
open GraphBLAS.FSharp.Backend.Quotes
1011

1112
module internal Map2 =
12-
let binSearch<'a> =
13-
<@ fun lenght sourceIndex (rowIndices: ClArray<int>) (columnIndices: ClArray<int>) (values: ClArray<'a>) ->
14-
15-
let mutable leftEdge = 0
16-
let mutable rightEdge = lenght - 1
17-
18-
let mutable result = None
19-
20-
while leftEdge <= rightEdge do
21-
let middleIdx = (leftEdge + rightEdge) / 2
22-
23-
let currentIndex: uint64 =
24-
((uint64 rowIndices.[middleIdx]) <<< 32)
25-
||| (uint64 columnIndices.[middleIdx])
26-
27-
if sourceIndex = currentIndex then
28-
result <- Some values.[middleIdx]
29-
30-
rightEdge <- -1 // TODO() break
31-
elif sourceIndex < currentIndex then
32-
rightEdge <- middleIdx - 1
33-
else
34-
leftEdge <- middleIdx + 1
35-
36-
result @>
3713

3814
let preparePositions<'a, 'b, 'c> (clContext: ClContext) workGroupSize opAdd =
3915

@@ -51,10 +27,10 @@ module internal Map2 =
5127
(uint64 rowIndex <<< 32) ||| (uint64 columnIndex)
5228

5329
let leftValue =
54-
(%binSearch) leftValuesLength index leftRows leftColumns leftValues
30+
(%Search.Bin.byKey2) leftValuesLength index leftRows leftColumns leftValues
5531

5632
let rightValue =
57-
(%binSearch) rightValuesLength index rightRows rightColumn rightValues
33+
(%Search.Bin.byKey2) rightValuesLength index rightRows rightColumn rightValues
5834

5935
match (%op) leftValue rightValue with
6036
| Some value ->

src/GraphBLAS-sharp.Backend/Objects/ArraysExtentions.fs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,14 @@ module ArraysExtensions =
1313
let dst = Array.zeroCreate this.Length
1414
q.PostAndReply(fun ch -> Msg.CreateToHostMsg(this, dst, ch))
1515

16+
member this.Free(q: MailboxProcessor<_>) = q.Post <| Msg.CreateFreeMsg this
17+
18+
member this.ToHostAndFree(q: MailboxProcessor<_>) =
19+
let result = this.ToHost q
20+
this.Free q
21+
22+
result
23+
1624
member this.Size = this.Length
1725

1826
type 'a ``[]`` with

src/GraphBLAS-sharp.Backend/Quotes/PreparePositions.fs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,12 @@ module PreparePositions =
2727
allValuesBuffer.[index] <- v
2828
rawPositionsBuffer.[index] <- 1
2929
| None -> rawPositionsBuffer.[index] <- 0 @>
30+
31+
let getUniqueBitmapLocal<'a when 'a : equality> =
32+
<@ fun (array: 'a []) length lid (result: int []) ->
33+
if lid < length then
34+
let isFirst = lid = 0
35+
let isUnique = lid > 0 && array.[lid] <> array.[lid - 1]
36+
37+
if isFirst || isUnique then result.[lid] <- 1 else result.[lid] <- 0 @>
38+
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
namespace GraphBLAS.FSharp.Backend.Quotes
2+
3+
open Brahma.FSharp
4+
5+
module Search =
6+
module Bin =
7+
let byKey<'a> =
8+
<@ fun lenght sourceIndex (indices: ClArray<int>) (values: ClArray<'a>) ->
9+
10+
let mutable leftEdge = 0
11+
let mutable rightEdge = lenght - 1
12+
13+
let mutable result = None
14+
15+
while leftEdge <= rightEdge do
16+
let middleIdx = (leftEdge + rightEdge) / 2
17+
let currentIndex = indices.[middleIdx]
18+
19+
if sourceIndex = currentIndex then
20+
result <- Some values.[middleIdx]
21+
22+
rightEdge <- -1 // TODO() break
23+
elif sourceIndex < currentIndex then
24+
rightEdge <- middleIdx - 1
25+
else
26+
leftEdge <- middleIdx + 1
27+
28+
result @>
29+
30+
let byKey2<'a> =
31+
<@ fun lenght sourceIndex (rowIndices: ClArray<int>) (columnIndices: ClArray<int>) (values: ClArray<'a>) ->
32+
33+
let mutable leftEdge = 0
34+
let mutable rightEdge = lenght - 1
35+
36+
let mutable result = None
37+
38+
while leftEdge <= rightEdge do
39+
let middleIdx = (leftEdge + rightEdge) / 2
40+
41+
let currentIndex: uint64 =
42+
((uint64 rowIndices.[middleIdx]) <<< 32)
43+
||| (uint64 columnIndices.[middleIdx])
44+
45+
if sourceIndex = currentIndex then
46+
result <- Some values.[middleIdx]
47+
48+
rightEdge <- -1 // TODO() break
49+
elif sourceIndex < currentIndex then
50+
rightEdge <- middleIdx - 1
51+
else
52+
leftEdge <- middleIdx + 1
53+
54+
result @>
55+
56+
/// <summary>
57+
/// Find lower position of item in array.
58+
/// </summary>
59+
let lowerPosition<'a when 'a : equality and 'a: comparison> =
60+
<@ fun lenght sourceItem (keys: 'a []) ->
61+
62+
let mutable leftEdge = 0
63+
let mutable rightEdge = lenght - 1
64+
let mutable resultPosition = None
65+
66+
while leftEdge <= rightEdge do
67+
let currentPosition = (leftEdge + rightEdge) / 2
68+
let currentKey = keys.[currentPosition]
69+
70+
if sourceItem = currentKey then
71+
// remember positions and move left
72+
resultPosition <- Some currentPosition
73+
74+
rightEdge <- currentPosition - 1
75+
elif sourceItem < currentKey then
76+
rightEdge <- currentPosition - 1
77+
else
78+
leftEdge <- currentPosition + 1
79+
80+
resultPosition @>

src/GraphBLAS-sharp.Backend/Quotes/SubSum.fs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ open Brahma.FSharp
44

55
module SubSum =
66
let private treeAccess<'a> opAdd =
7-
<@ fun step lid wgSize (localBuffer: 'a []) ->
7+
<@ fun step lid _ (localBuffer: 'a []) ->
88
let i = step * (lid + 1) - 1
99

1010
let firstValue = localBuffer.[i - (step >>> 1)]
@@ -35,3 +35,21 @@ module SubSum =
3535
sumGeneral<'a> <| sequentialAccess<'a> opAdd
3636

3737
let treeSum<'a> opAdd = sumGeneral<'a> <| treeAccess<'a> opAdd
38+
39+
let localPrefixSum opAdd =
40+
<@ fun (lid: int) (workGroupSize: int) (array: 'a []) ->
41+
let mutable offset = 1
42+
43+
while offset < workGroupSize do
44+
barrierLocal ()
45+
let mutable value = array.[lid]
46+
47+
if lid >= offset then
48+
value <- (%opAdd) value array.[lid - offset]
49+
50+
offset <- offset * 2
51+
52+
barrierLocal ()
53+
array.[lid] <- value @>
54+
55+
let localIntPrefixSum = localPrefixSum <@ (+) @>

0 commit comments

Comments
 (0)