@@ -3,31 +3,25 @@ namespace GraphBLAS.FSharp.Backend.Common
33open Brahma.OpenCL
44open Brahma.FSharp .OpenCL .WorkflowBuilder .Basic
55open Brahma.FSharp .OpenCL .WorkflowBuilder .Evaluation
6- open Utils
76
87// functions in mudule could be named run\get\if\it\t
98// like mentioned here https://www.reddit.com/r/fsharp/comments/5kvsyk/modules_or_namespaces/dbt0zf7?utm_source=share&utm_medium=web2x&context=3
10- module internal Scan =
11- // Changes inputArray
12- let run ( inputArray : int []) ( totalSum : int []) =
13- let outputArray = inputArray
14- let outputArrayLength = outputArray.Length
15- let workGroupSize = workGroupSize
9+ module internal PrefixSum =
10+ let scan ( inputArray : int []) ( inputArrayLength : int ) ( vertices : int []) ( verticesLength : int ) ( totalSum : int []) : OpenCLEvaluation < unit > = opencl {
11+ let workGroupSize = Utils.workGroupSize
1612
1713 let scan =
1814 <@
1915 fun ( ndRange : _1D )
2016 ( resultBuffer : int [])
21- ( resultLength : int )
2217 ( verticesBuffer : int [])
23- ( verticesLength : int )
2418 ( totalSumBuffer : int []) ->
2519
2620 let resultLocalBuffer = localArray< int> workGroupSize
2721 let i = ndRange.GlobalID0
2822 let localID = ndRange.LocalID0
2923
30- if i < resultLength then resultLocalBuffer.[ localID] <- resultBuffer.[ i] else resultLocalBuffer.[ localID] <- 0
24+ if i < inputArrayLength then resultLocalBuffer.[ localID] <- resultBuffer.[ i] else resultLocalBuffer.[ localID] <- 0
3125
3226 let mutable step = 2
3327 while step <= workGroupSize do
@@ -56,71 +50,63 @@ module internal Scan =
5650 step <- step >>> 1
5751 barrier ()
5852
59- if i < resultLength then resultBuffer.[ i] <- resultLocalBuffer.[ localID]
53+ if i < inputArrayLength then resultBuffer.[ i] <- resultLocalBuffer.[ localID]
6054 @>
6155
62- let scan array length vertices verticesLength =
63- opencl {
64- let binder kernelP =
65- let ndRange = _ 1D( workSize length, workGroupSize)
66- kernelP
67- ndRange
68- array
69- length
70- vertices
71- verticesLength
72- totalSum
73- do ! RunCommand scan binder
74- }
56+ do ! RunCommand scan <| fun kernelPrepare ->
57+ let ndRange = _ 1D( Utils.workSize inputArrayLength, workGroupSize)
58+ kernelPrepare
59+ ndRange
60+ inputArray
61+ vertices
62+ totalSum
63+ }
64+
65+ let update ( inputArray : int []) ( inputArrayLength : int ) ( vertices : int []) ( bunchLength : int ) : OpenCLEvaluation < unit > = opencl {
66+ let workGroupSize = Utils.workGroupSize
7567
7668 let update =
7769 <@
7870 fun ( ndRange : _1D )
7971 ( resultBuffer : int [])
80- ( resultLength : int )
81- ( verticesBuffer : int [])
82- ( bunchLength : int ) ->
72+ ( verticesBuffer : int []) ->
8373
8474 let i = ndRange.GlobalID0 + bunchLength
85- if i < resultLength then
75+ if i < inputArrayLength then
8676 resultBuffer.[ i] <- resultBuffer.[ i] + verticesBuffer.[ i / bunchLength]
8777 @>
8878
89- let update vertices depth =
90- opencl {
91- let binder kernelP =
92- let ndRange = _ 1D( workSize outputArrayLength - depth, workGroupSize)
93- kernelP
94- ndRange
95- outputArray
96- outputArrayLength
97- vertices
98- depth
99- do ! RunCommand update binder
100- }
101-
102- let firstVertices = Array.zeroCreate <| ( outputArrayLength - 1 ) / workGroupSize + 1
79+ do ! RunCommand update <| fun kernelPrepare ->
80+ let ndRange = _ 1D( Utils.workSize inputArrayLength - bunchLength, workGroupSize)
81+ kernelPrepare
82+ ndRange
83+ inputArray
84+ vertices
85+ }
86+
87+ // Changes received arrays
88+ let run ( inputArray : int []) ( totalSum : int []) = opencl {
89+ let workGroupSize = Utils.workGroupSize
90+
91+ let firstVertices = Array.zeroCreate <| ( inputArray.Length - 1 ) / workGroupSize + 1
10392 let secondVertices = Array.zeroCreate <| ( firstVertices.Length - 1 ) / workGroupSize + 1
10493 let mutable verticesArrays = firstVertices, secondVertices
10594 let swap ( a , b ) = ( b, a)
10695
107- opencl {
108- let mutable verticesLength = ( outputArrayLength - 1 ) / workGroupSize + 1
109- let mutable bunchLength = workGroupSize
110-
111- do ! scan outputArray outputArrayLength <| fst verticesArrays <| verticesLength
112- while verticesLength > 1 do
113- let fstVertices = fst verticesArrays
114- let sndVertices = snd verticesArrays
115- do ! scan fstVertices verticesLength sndVertices (( verticesLength - 1 ) / workGroupSize + 1 )
116- do ! update fstVertices bunchLength
96+ let mutable verticesLength = ( inputArray.Length - 1 ) / workGroupSize + 1
97+ let mutable bunchLength = workGroupSize
11798
118- bunchLength <- bunchLength * workGroupSize
119- verticesArrays <- swap verticesArrays
120- verticesLength <- ( verticesLength - 1 ) / workGroupSize + 1
99+ do ! scan inputArray inputArray.Length ( fst verticesArrays) verticesLength totalSum
100+ while verticesLength > 1 do
101+ let fstVertices = fst verticesArrays
102+ let sndVertices = snd verticesArrays
103+ do ! scan fstVertices verticesLength sndVertices (( verticesLength - 1 ) / workGroupSize + 1 ) totalSum
104+ do ! update inputArray inputArray.Length fstVertices bunchLength
121105
122- return outputArray, totalSum
123- }
106+ bunchLength <- bunchLength * workGroupSize
107+ verticesArrays <- swap verticesArrays
108+ verticesLength <- ( verticesLength - 1 ) / workGroupSize + 1
109+ }
124110
125111 // let rec v1 (inputArray: int[]) =
126112 // let outputArray = Array.zeroCreate inputArray.Length
0 commit comments