Skip to content

Commit 4199c68

Browse files
authored
Merge pull request #50 from IgorErin/duplications
Quotes duplications
2 parents 4422318 + 5cd6610 commit 4199c68

15 files changed

Lines changed: 385 additions & 777 deletions

File tree

src/GraphBLAS-sharp.Backend/Common/ClArray.fs

Lines changed: 58 additions & 201 deletions
Original file line numberDiff line numberDiff line change
@@ -127,231 +127,83 @@ module ClArray =
127127

128128
outputArray
129129

130-
let private update (clContext: ClContext) =
131-
132-
let update =
133-
<@ fun (ndRange: Range1D) inputArrayLength bunchLength (resultBuffer: ClArray<int>) (verticesBuffer: ClArray<int>) ->
134-
135-
let i = ndRange.GlobalID0 + bunchLength
136-
137-
if i < inputArrayLength then
138-
resultBuffer.[i] <-
139-
resultBuffer.[i]
140-
+ verticesBuffer.[i / bunchLength] @>
141-
142-
let kernel = clContext.Compile(update)
143-
144-
fun (processor: MailboxProcessor<_>) workGroupSize (inputArray: ClArray<int>) (inputArrayLength: int) (vertices: ClArray<int>) (bunchLength: int) ->
145-
let ndRange =
146-
Range1D.CreateValid(inputArrayLength - bunchLength, workGroupSize)
147-
148-
let kernel = kernel.GetKernel()
149-
150-
processor.Post(
151-
Msg.MsgSetArguments
152-
(fun () -> kernel.KernelFunc ndRange inputArrayLength bunchLength inputArray vertices)
153-
)
154-
155-
processor.Post(Msg.CreateRunMsg<_, _> kernel)
156-
157-
let private scan (clContext: ClContext) workGroupSize =
158-
159-
let scan =
160-
<@ fun (ndRange: Range1D) inputArrayLength verticesLength (resultBuffer: ClArray<int>) (verticesBuffer: ClArray<int>) (totalSumBuffer: ClCell<int>) ->
161-
162-
let resultLocalBuffer = localArray<int> workGroupSize
163-
let i = ndRange.GlobalID0
164-
let localID = ndRange.LocalID0
165-
166-
if i < inputArrayLength then
167-
resultLocalBuffer.[localID] <- resultBuffer.[i]
168-
else
169-
resultLocalBuffer.[localID] <- 0
170-
171-
let mutable step = 2
172-
173-
while step <= workGroupSize do
174-
barrierLocal ()
175-
176-
if localID < workGroupSize / step then
177-
let i = step * (localID + 1) - 1
178-
179-
resultLocalBuffer.[i] <-
180-
resultLocalBuffer.[i]
181-
+ resultLocalBuffer.[i - (step >>> 1)]
182-
183-
step <- step <<< 1
184-
185-
barrierLocal ()
186-
187-
if localID = workGroupSize - 1 then
188-
if verticesLength <= 1 && localID = i then
189-
totalSumBuffer.Value <- resultLocalBuffer.[localID]
190-
191-
verticesBuffer.[i / workGroupSize] <- resultLocalBuffer.[localID]
192-
resultLocalBuffer.[localID] <- 0
193-
194-
step <- workGroupSize
195-
196-
while step > 1 do
197-
barrierLocal ()
198-
199-
if localID < workGroupSize / step then
200-
let i = step * (localID + 1) - 1
201-
let j = i - (step >>> 1)
202-
203-
let tmp = resultLocalBuffer.[i]
204-
resultLocalBuffer.[i] <- resultLocalBuffer.[i] + resultLocalBuffer.[j]
205-
resultLocalBuffer.[j] <- tmp
206-
207-
step <- step >>> 1
208-
209-
barrierLocal ()
210-
211-
if i < inputArrayLength then
212-
resultBuffer.[i] <- resultLocalBuffer.[localID] @>
213-
214-
let kernel = clContext.Compile(scan)
215-
216-
fun (processor: MailboxProcessor<_>) (inputArray: ClArray<int>) (inputArrayLength: int) (vertices: ClArray<int>) (verticesLength: int) (totalSum: ClCell<int>) ->
217-
let ndRange =
218-
Range1D.CreateValid(inputArrayLength, workGroupSize)
219-
220-
let kernel = kernel.GetKernel()
221-
222-
processor.Post(
223-
Msg.MsgSetArguments
224-
(fun () -> kernel.KernelFunc ndRange inputArrayLength verticesLength inputArray vertices totalSum)
225-
)
226-
227-
processor.Post(Msg.CreateRunMsg<_, _> kernel)
228-
229130
/// <summary>
230131
/// Exclude inplace prefix sum.
231132
/// </summary>
232133
/// <example>
233134
/// <code>
234-
/// let arr = [| 1; 2; 3 |]
135+
/// let arr = [| 1; 1; 1; 1 |]
235136
/// let sum = [| 0 |]
236-
/// opencl { do! runExcludeInplace arr sum }
137+
/// runExcludeInplace clContext workGroupSize processor arr sum <@ (+) @> 0
138+
/// |> ignore
237139
/// ...
238-
/// > val arr = [| 0; 1; 3 |]
239-
/// > val sum = [| 6 |]
140+
/// > val arr = [| 0; 1; 2; 3 |]
141+
/// > val sum = [| 4 |]
240142
/// </code>
241143
/// </example>
242144
///<param name="clContext">.</param>
243145
///<param name="workGroupSize">Should be a power of 2 and greater than 1.</param>
244-
let prefixSumExcludeInplace (clContext: ClContext) workGroupSize =
245-
246-
let scan = scan clContext workGroupSize
247-
let update = update clContext
248-
249-
fun (processor: MailboxProcessor<_>) (inputArray: ClArray<int>) (totalSum: ClCell<int>) ->
250-
let firstVertices =
251-
clContext.CreateClArray<int>(
252-
(inputArray.Length - 1) / workGroupSize + 1,
253-
hostAccessMode = HostAccessMode.NotAccessible,
254-
allocationMode = AllocationMode.Default
255-
)
256-
257-
let secondVertices =
258-
clContext.CreateClArray<int>(
259-
(firstVertices.Length - 1) / workGroupSize + 1,
260-
hostAccessMode = HostAccessMode.NotAccessible,
261-
allocationMode = AllocationMode.Default
262-
)
263-
264-
let mutable verticesArrays = firstVertices, secondVertices
265-
let swap (a, b) = (b, a)
266-
let mutable verticesLength = firstVertices.Length
267-
let mutable bunchLength = workGroupSize
268-
269-
scan processor inputArray inputArray.Length (fst verticesArrays) verticesLength totalSum
270-
271-
while verticesLength > 1 do
272-
let fstVertices = fst verticesArrays
273-
let sndVertices = snd verticesArrays
274-
275-
scan
276-
processor
277-
fstVertices
278-
verticesLength
279-
sndVertices
280-
((verticesLength - 1) / workGroupSize + 1)
281-
totalSum
282-
283-
update processor workGroupSize inputArray inputArray.Length fstVertices bunchLength
284-
bunchLength <- bunchLength * workGroupSize
285-
verticesArrays <- swap verticesArrays
286-
verticesLength <- (verticesLength - 1) / workGroupSize + 1
287-
288-
processor.Post(Msg.CreateFreeMsg(firstVertices))
289-
processor.Post(Msg.CreateFreeMsg(secondVertices))
290-
291-
inputArray, totalSum
292-
293-
///<param name="clContext">.</param>
294-
///<param name="workGroupSize">Should be a power of 2 and greater than 1.</param>
295-
let prefixSumExclude (clContext: ClContext) workGroupSize =
296-
297-
let copy = copy clContext workGroupSize
298-
299-
let prefixSumExcludeInplace =
300-
prefixSumExcludeInplace clContext workGroupSize
301-
302-
fun (processor: MailboxProcessor<_>) (inputArray: ClArray<int>) ->
303-
let copiedArray = copy processor inputArray
304-
305-
let totalSum = clContext.CreateClCell 0
306-
prefixSumExcludeInplace processor copiedArray totalSum
146+
///<param name="processor">.</param>
147+
///<param name="inputArray">.</param>
148+
///<param name="totalSum">.</param>
149+
///<param name="plus">Associative binary operation.</param>
150+
///<param name="zero">Zero element for binary operation.</param>
151+
let prefixSumExcludeInplace = PrefixSum.runExcludeInplace
307152

153+
/// <summary>
154+
/// Include inplace prefix sum.
155+
/// </summary>
156+
/// <example>
157+
/// <code>
158+
/// let arr = [| 1; 1; 1; 1 |]
159+
/// let sum = [| 0 |]
160+
/// runExcludeInplace clContext workGroupSize processor arr sum <@ (+) @> 0
161+
/// |> ignore
162+
/// ...
163+
/// > val arr = [| 1; 2; 3; 4 |]
164+
/// > val sum = [| 4 |]
165+
/// </code>
166+
/// </example>
308167
///<param name="clContext">.</param>
309168
///<param name="workGroupSize">Should be a power of 2 and greater than 1.</param>
310-
let prefixSumInclude (clContext: ClContext) workGroupSize =
311-
312-
let kernel =
313-
<@ fun (range: Range1D) (inputArray: ClArray<int>) inputArrayLength (totalSum: ClCell<int>) (outputArray: ClArray<int>) ->
169+
///<param name="processor">.</param>
170+
///<param name="inputArray">.</param>
171+
///<param name="totalSum">.</param>
172+
///<param name="plus">Associative binary operation.</param>
173+
///<param name="zero">Zero element for binary operation.</param>
174+
let prefixSumIncludeInplace = PrefixSum.runIncludeInplace
314175

315-
let gid = range.GlobalID0
176+
let prefixSumExclude plus (clContext: ClContext) workGroupSize =
316177

317-
if gid = inputArrayLength - 1 then
318-
outputArray.[gid] <- totalSum.Value
319-
elif gid < inputArrayLength - 1 then
320-
outputArray.[gid] <- inputArray.[gid + 1] @>
178+
let runExcludeInplace =
179+
prefixSumExcludeInplace plus clContext workGroupSize
321180

322-
let kernel = clContext.Compile(kernel)
323181
let copy = copy clContext workGroupSize
324182

325-
let prefixSumExcludeInplace =
326-
prefixSumExcludeInplace clContext workGroupSize
183+
fun (processor: MailboxProcessor<_>) (inputArray: ClArray<'a>) (totalSum: ClCell<'a>) (zero: 'a) ->
327184

328-
fun (processor: MailboxProcessor<_>) (inputArray: ClArray<'a>) ->
329-
let copiedArray = copy processor inputArray
330-
let inputArrayLength = inputArray.Length
331-
let totalSum = clContext.CreateClCell 0
185+
let outputArray = copy processor inputArray
332186

333-
let _, totalSum =
334-
prefixSumExcludeInplace processor copiedArray totalSum
187+
runExcludeInplace processor outputArray totalSum zero
335188

336-
let outputArray =
337-
clContext.CreateClArray(inputArrayLength, allocationMode = AllocationMode.Default)
189+
let prefixSumInclude plus (clContext: ClContext) workGroupSize =
338190

339-
let ndRange =
340-
Range1D.CreateValid(inputArrayLength, workGroupSize)
191+
let runIncludeInplace =
192+
prefixSumIncludeInplace plus clContext workGroupSize
341193

342-
let kernel = kernel.GetKernel()
194+
let copy = copy clContext workGroupSize
343195

344-
processor.Post(
345-
Msg.MsgSetArguments
346-
(fun () -> kernel.KernelFunc ndRange copiedArray inputArrayLength totalSum outputArray)
347-
)
196+
fun (processor: MailboxProcessor<_>) (inputArray: ClArray<'a>) (totalSum: ClCell<'a>) (zero: 'a) ->
348197

349-
processor.Post(Msg.CreateRunMsg<_, _> kernel)
198+
let outputArray = copy processor inputArray
350199

351-
processor.Post(Msg.CreateFreeMsg(copiedArray))
200+
runIncludeInplace processor outputArray totalSum zero
352201

353-
outputArray, totalSum
202+
let prefixSumBackwardsExcludeInplace plus =
203+
PrefixSum.runBackwardsExcludeInplace plus
354204

205+
let prefixSumBackwardsIncludeInplace plus =
206+
PrefixSum.runBackwardsIncludeInplace plus
355207

356208
let getUniqueBitmap (clContext: ClContext) =
357209

@@ -391,7 +243,6 @@ module ClArray =
391243

392244
bitmap
393245

394-
395246
let setPositions (clContext: ClContext) =
396247

397248
let setPositions =
@@ -431,20 +282,26 @@ module ClArray =
431282

432283
let setPositions = setPositions clContext
433284
let getUniqueBitmap = getUniqueBitmap clContext
434-
let prefixSumExclude = prefixSumExclude clContext workGroupSize
285+
286+
let prefixSumExclude =
287+
prefixSumExclude <@ (+) @> clContext workGroupSize
435288

436289
fun (processor: MailboxProcessor<_>) (inputArray: ClArray<'a>) ->
437290

438291
let bitmap =
439292
getUniqueBitmap processor workGroupSize inputArray
440293

441-
let (positions, sum) = prefixSumExclude processor bitmap
294+
let sum = clContext.CreateClCell 0
295+
296+
let positions, sum = prefixSumExclude processor bitmap sum 0
442297

443298
let resultLength =
444299
let a = [| 0 |]
445300

446-
let _ =
447-
processor.PostAndReply(fun ch -> Msg.CreateToHostMsg(sum, a, ch))
301+
processor.PostAndReply(fun ch -> Msg.CreateToHostMsg(sum, a, ch))
302+
|> ignore
303+
304+
processor.Post(Msg.CreateFreeMsg<_>(sum))
448305

449306
a.[0]
450307

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
namespace GraphBLAS.FSharp.Backend
2+
3+
open Brahma.FSharp
4+
5+
module SubSum =
6+
let private treeAccess<'a> opAdd =
7+
<@ fun step lid wgSize (localBuffer: 'a []) ->
8+
let i = step * (lid + 1) - 1
9+
10+
let firstValue = localBuffer.[i - (step >>> 1)]
11+
let secondValue = localBuffer.[i]
12+
13+
localBuffer.[i] <- (%opAdd) firstValue secondValue @>
14+
15+
let private sequentialAccess<'a> opAdd =
16+
<@ fun step lid wgSize (localBuffer: 'a []) ->
17+
let firstValue = localBuffer.[lid]
18+
let secondValue = localBuffer.[lid + wgSize / step]
19+
20+
localBuffer.[lid] <- (%opAdd) firstValue secondValue @>
21+
22+
let private sumGeneral<'a> memoryAccess =
23+
<@ fun wgSize lid (localBuffer: 'a []) ->
24+
let mutable step = 2
25+
26+
while step <= wgSize do
27+
if lid < wgSize / step then
28+
(%memoryAccess) step lid wgSize localBuffer
29+
30+
step <- step <<< 1
31+
32+
barrierLocal () @>
33+
34+
let sequentialSum<'a> opAdd =
35+
sumGeneral<'a> <| sequentialAccess<'a> opAdd
36+
37+
let treeSum<'a> opAdd = sumGeneral<'a> <| treeAccess<'a> opAdd
38+
39+
module PreparePositions =
40+
let both<'c> =
41+
<@ fun index (result: 'c option) (rawPositionsBuffer: ClArray<int>) (allValuesBuffer: ClArray<'c>) ->
42+
rawPositionsBuffer.[index] <- 0
43+
44+
match result with
45+
| Some v ->
46+
allValuesBuffer.[index + 1] <- v
47+
rawPositionsBuffer.[index + 1] <- 1
48+
| None -> rawPositionsBuffer.[index + 1] <- 0 @>
49+
50+
let leftRight<'c> =
51+
<@ fun index (leftResult: 'c option) (rightResult: 'c option) (isLeftBitmap: ClArray<int>) (allValuesBuffer: ClArray<'c>) (rawPositionsBuffer: ClArray<int>) ->
52+
if isLeftBitmap.[index] = 1 then
53+
match leftResult with
54+
| Some v ->
55+
allValuesBuffer.[index] <- v
56+
rawPositionsBuffer.[index] <- 1
57+
| None -> rawPositionsBuffer.[index] <- 0
58+
else
59+
match rightResult with
60+
| Some v ->
61+
allValuesBuffer.[index] <- v
62+
rawPositionsBuffer.[index] <- 1
63+
| None -> rawPositionsBuffer.[index] <- 0 @>

0 commit comments

Comments
 (0)