Skip to content

Commit 66f116d

Browse files
authored
Merge pull request #16 from artemgl/coo-element-wise-addition-optimization
Element-wise addition fix and refactoring
2 parents 77f4be6 + e699ca1 commit 66f116d

15 files changed

Lines changed: 774 additions & 492 deletions

File tree

.vscode/settings.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,6 @@
33
"FSharp.enableAnalyzers": true,
44
"FSharp.analyzersPath": [
55
"./packages/analyzers"
6-
]
6+
],
7+
"FSharp.suggestSdkScripts": false
78
}

benchmarks/GraphBLAS-sharp.Benchmarks/Configs/EWiseAddBenchmarks4Float32.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
arc130.mtx
2-
!linux_call_graph.mtx
3-
!webbase-1M.mtx
2+
linux_call_graph.mtx
3+
webbase-1M.mtx
44
!cit-Patents.mtx
55
!wikipedia-20061104.mtx
66
!wb-edu.mtx

src/GraphBLAS-sharp/Abstracts.fs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ type MatrixTuples<'a when 'a : struct and 'a : equality> = {
1010
}
1111
with
1212
member this.ToHost() = opencl {
13-
let! rows = ToHost this.RowIndices
14-
let! cols = ToHost this.ColumnIndices
15-
let! vals = ToHost this.Values
13+
let! rows = if this.RowIndices.Length = 0 then opencl { return [||] } else ToHost this.RowIndices
14+
let! cols = if this.ColumnIndices.Length = 0 then opencl { return [||] } else ToHost this.ColumnIndices
15+
let! vals = if this.Values.Length = 0 then opencl { return [||] } else ToHost this.Values
1616

1717
return {
1818
RowIndices = rows
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
namespace GraphBLAS.FSharp.Backend.Common
2+
3+
open Brahma.OpenCL
4+
open Brahma.FSharp.OpenCL.WorkflowBuilder.Basic
5+
open Utils
6+
7+
module internal Copy =
8+
let runNotEmpty (inputArray: 'a[]) = opencl {
9+
let inputArrayLength = inputArray.Length
10+
let copy =
11+
<@
12+
fun (ndRange: _1D)
13+
(inputArrayBuffer: 'a[])
14+
(outputArrayBuffer: 'a[]) ->
15+
16+
let i = ndRange.GlobalID0
17+
if i < inputArrayLength then
18+
outputArrayBuffer.[i] <- inputArrayBuffer.[i]
19+
@>
20+
21+
let outputArray = Array.zeroCreate inputArray.Length
22+
23+
do! RunCommand copy <| fun kernelPrepare ->
24+
let ndRange = _1D(workSize inputArray.Length, workGroupSize)
25+
kernelPrepare ndRange inputArray outputArray
26+
return outputArray
27+
}
28+
29+
let run (inputArray: 'a[]) = if inputArray.Length = 0 then opencl { return [||] } else runNotEmpty inputArray
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
namespace GraphBLAS.FSharp.Backend.Common
2+
3+
open Brahma.OpenCL
4+
open Brahma.FSharp.OpenCL.WorkflowBuilder.Basic
5+
open Brahma.FSharp.OpenCL.WorkflowBuilder.Evaluation
6+
7+
// functions in mudule could be named run\get\if\it\t
8+
// like mentioned here https://www.reddit.com/r/fsharp/comments/5kvsyk/modules_or_namespaces/dbt0zf7?utm_source=share&utm_medium=web2x&context=3
9+
module internal PrefixSum =
10+
let scan (inputArray: int[]) (inputArrayLength: int) (vertices: int[]) (verticesLength: int) (totalSum: int[]) : OpenCLEvaluation<unit> = opencl {
11+
let workGroupSize = Utils.workGroupSize
12+
13+
let scan =
14+
<@
15+
fun (ndRange: _1D)
16+
(resultBuffer: int[])
17+
(verticesBuffer: int[])
18+
(totalSumBuffer: int[]) ->
19+
20+
let resultLocalBuffer = localArray<int> workGroupSize
21+
let i = ndRange.GlobalID0
22+
let localID = ndRange.LocalID0
23+
24+
if i < inputArrayLength then resultLocalBuffer.[localID] <- resultBuffer.[i] else resultLocalBuffer.[localID] <- 0
25+
26+
let mutable step = 2
27+
while step <= workGroupSize do
28+
barrier ()
29+
if localID < workGroupSize / step then
30+
let i = step * (localID + 1) - 1
31+
resultLocalBuffer.[i] <- resultLocalBuffer.[i] + resultLocalBuffer.[i - (step >>> 1)]
32+
step <- step <<< 1
33+
barrier ()
34+
35+
if localID = workGroupSize - 1 then
36+
if verticesLength <= 1 && localID = i then totalSumBuffer.[0] <- resultLocalBuffer.[localID]
37+
verticesBuffer.[i / workGroupSize] <- resultLocalBuffer.[localID]
38+
resultLocalBuffer.[localID] <- 0
39+
40+
step <- workGroupSize
41+
while step > 1 do
42+
barrier ()
43+
if localID < workGroupSize / step then
44+
let i = step * (localID + 1) - 1
45+
let j = i - (step >>> 1)
46+
47+
let tmp = resultLocalBuffer.[i]
48+
resultLocalBuffer.[i] <- resultLocalBuffer.[i] + resultLocalBuffer.[j]
49+
resultLocalBuffer.[j] <- tmp
50+
step <- step >>> 1
51+
barrier ()
52+
53+
if i < inputArrayLength then resultBuffer.[i] <- resultLocalBuffer.[localID]
54+
@>
55+
56+
do! RunCommand scan <| fun kernelPrepare ->
57+
let ndRange = _1D(Utils.workSize inputArrayLength, workGroupSize)
58+
kernelPrepare
59+
ndRange
60+
inputArray
61+
vertices
62+
totalSum
63+
}
64+
65+
let update (inputArray: int[]) (inputArrayLength: int) (vertices: int[]) (bunchLength: int) : OpenCLEvaluation<unit> = opencl {
66+
let workGroupSize = Utils.workGroupSize
67+
68+
let update =
69+
<@
70+
fun (ndRange: _1D)
71+
(resultBuffer: int[])
72+
(verticesBuffer: int[]) ->
73+
74+
let i = ndRange.GlobalID0 + bunchLength
75+
if i < inputArrayLength then
76+
resultBuffer.[i] <- resultBuffer.[i] + verticesBuffer.[i / bunchLength]
77+
@>
78+
79+
do! RunCommand update <| fun kernelPrepare ->
80+
let ndRange = _1D(Utils.workSize inputArrayLength - bunchLength, workGroupSize)
81+
kernelPrepare
82+
ndRange
83+
inputArray
84+
vertices
85+
}
86+
87+
// Changes received arrays
88+
let run (inputArray: int[]) (totalSum: int[]) = opencl {
89+
let workGroupSize = Utils.workGroupSize
90+
91+
let firstVertices = Array.zeroCreate <| (inputArray.Length - 1) / workGroupSize + 1
92+
let secondVertices = Array.zeroCreate <| (firstVertices.Length - 1) / workGroupSize + 1
93+
let mutable verticesArrays = firstVertices, secondVertices
94+
let swap (a, b) = (b, a)
95+
96+
let mutable verticesLength = (inputArray.Length - 1) / workGroupSize + 1
97+
let mutable bunchLength = workGroupSize
98+
99+
do! scan inputArray inputArray.Length (fst verticesArrays) verticesLength totalSum
100+
while verticesLength > 1 do
101+
let fstVertices = fst verticesArrays
102+
let sndVertices = snd verticesArrays
103+
do! scan fstVertices verticesLength sndVertices ((verticesLength - 1) / workGroupSize + 1) totalSum
104+
do! update inputArray inputArray.Length fstVertices bunchLength
105+
106+
bunchLength <- bunchLength * workGroupSize
107+
verticesArrays <- swap verticesArrays
108+
verticesLength <- (verticesLength - 1) / workGroupSize + 1
109+
}
110+
111+
// let rec v1 (inputArray: int[]) =
112+
// let outputArray = Array.zeroCreate inputArray.Length
113+
114+
// if inputArray.Length = 1 then
115+
// let fillOutputArray =
116+
// <@
117+
// fun (ndRange: _1D)
118+
// (inputArrayBuffer: int[])
119+
// (outputArrayBuffer: int[]) ->
120+
121+
// let i = ndRange.GlobalID0
122+
// outputArrayBuffer.[i] <- inputArrayBuffer.[i]
123+
// @>
124+
125+
// opencl {
126+
// let binder kernelP =
127+
// let ndRange = _1D(outputArray.Length)
128+
// kernelP
129+
// ndRange
130+
// inputArray
131+
// outputArray
132+
// do! RunCommand fillOutputArray binder
133+
// return outputArray
134+
// }
135+
// else
136+
// let intermediateArray = Array.zeroCreate ((inputArray.Length + 1) / 2)
137+
// let inputArrayLength = inputArray.Length
138+
// let intermediateArrayLength = intermediateArray.Length
139+
140+
// let fillIntermediateArray =
141+
// <@
142+
// fun (ndRange: _1D)
143+
// (inputArrayBuffer: int[])
144+
// (intermediateArrayBuffer: int[]) ->
145+
146+
// let i = ndRange.GlobalID0
147+
// if i < intermediateArrayLength then
148+
// if 2 * i + 1 < inputArrayLength then
149+
// intermediateArrayBuffer.[i] <- inputArrayBuffer.[2 * i] + inputArrayBuffer.[2 * i + 1]
150+
// else intermediateArrayBuffer.[i] <- inputArrayBuffer.[2 * i]
151+
// @>
152+
153+
// let fillIntermediateArray =
154+
// opencl {
155+
// let binder kernelP =
156+
// let ndRange = _1D(workSize intermediateArray.Length, workGroupSize)
157+
// kernelP
158+
// ndRange
159+
// inputArray
160+
// intermediateArray
161+
// do! RunCommand fillIntermediateArray binder
162+
// }
163+
164+
// let fillOutputArray =
165+
// <@
166+
// fun (ndRange: _1D)
167+
// (auxiliaryPrefixSumArrayBuffer: int[])
168+
// (inputArrayBuffer: int[])
169+
// (outputArrayBuffer: int[]) ->
170+
171+
// let i = ndRange.GlobalID0
172+
// if i < inputArrayLength then
173+
// let j = (i - 1) / 2
174+
// if i % 2 = 0 then
175+
// if i = 0 then outputArrayBuffer.[i] <- inputArrayBuffer.[i]
176+
// else outputArrayBuffer.[i] <- auxiliaryPrefixSumArrayBuffer.[j] + inputArrayBuffer.[i]
177+
// else outputArrayBuffer.[i] <- auxiliaryPrefixSumArrayBuffer.[j]
178+
// @>
179+
180+
// opencl {
181+
// do! fillIntermediateArray
182+
// let! auxiliaryPrefixSumArray = v1 intermediateArray
183+
184+
// let binder kernelP =
185+
// let ndRange = _1D(workSize inputArray.Length, workGroupSize)
186+
// kernelP
187+
// ndRange
188+
// auxiliaryPrefixSumArray
189+
// inputArray
190+
// outputArray
191+
// do! RunCommand fillOutputArray binder
192+
193+
// return outputArray
194+
// }
195+
196+
// let v2 (inputArray: int[]) =
197+
// let firstIntermediateArray = Array.copy inputArray
198+
// let secondIntermediateArray = Array.copy inputArray
199+
// let outputArrayLength = firstIntermediateArray.Length
200+
201+
// let updateResult =
202+
// <@
203+
// fun (ndRange: _1D)
204+
// (offset: int)
205+
// (firstIntermediateArrayBuffer: int[])
206+
// (secondIntermediateArrayBuffer: int[]) ->
207+
208+
// let i = ndRange.GlobalID0
209+
// if i < outputArrayLength then
210+
// if i < offset then firstIntermediateArrayBuffer.[i] <- secondIntermediateArrayBuffer.[i]
211+
// else firstIntermediateArrayBuffer.[i] <- secondIntermediateArrayBuffer.[i] + secondIntermediateArrayBuffer.[i - offset]
212+
// @>
213+
214+
// let binder offset firstIntermediateArray secondIntermediateArray kernelP =
215+
// let ndRange = _1D(workSize outputArrayLength, workGroupSize)
216+
// kernelP
217+
// ndRange
218+
// offset
219+
// firstIntermediateArray
220+
// secondIntermediateArray
221+
222+
// let swap (a, b) = (b, a)
223+
// let mutable arrays = firstIntermediateArray, secondIntermediateArray
224+
225+
// opencl {
226+
// let mutable offset = 1
227+
// while offset < outputArrayLength do
228+
// arrays <- swap arrays
229+
// do! RunCommand updateResult <| (binder offset <|| arrays)
230+
// offset <- offset * 2
231+
232+
// return (fst arrays)
233+
// }

0 commit comments

Comments
 (0)