|
1 | 1 | namespace GraphBLAS.FSharp.Backend.Common |
2 | 2 |
|
3 | 3 | open Brahma.FSharp |
| 4 | +open GraphBLAS.FSharp.Backend |
4 | 5 | open Microsoft.FSharp.Quotations |
5 | 6 |
|
6 | | -module internal rec Sum = |
7 | | - let run (inputArray: 'a []) (plus: Expr<'a -> 'a -> 'a>) (zero: 'a) = |
8 | | - if inputArray.Length = 0 then |
9 | | - opencl { |
10 | | - let result = [| zero |] |
| 7 | +module internal Sum = |
11 | 8 |
|
12 | | - let bruh = |
13 | | - <@ fun (range: Range1D) (array: 'a []) -> |
14 | | - let mutable a = 0 |
15 | | - a <- 0 @> |
| 9 | + let private scan |
| 10 | + (clContext: ClContext) |
| 11 | + (workGroupSize: int) |
| 12 | + (opAdd: Expr<'a -> 'a -> 'a>) |
| 13 | + zero |
| 14 | + = |
16 | 15 |
|
17 | | - do! |
18 | | - runCommand bruh |
19 | | - <| fun kernelPrepare -> kernelPrepare <| Range1D(64, 64) <| result |
| 16 | + let subSum = SubSum.sequentialSum opAdd |
20 | 17 |
|
21 | | - return result |
22 | | - } |
23 | | - else |
24 | | - runNotEmpty inputArray plus zero |
| 18 | + let scan = |
| 19 | + <@ |
| 20 | + fun (ndRange: Range1D) length (inputArray: ClArray<'a>) (resultArray: ClArray<'a>) -> |
25 | 21 |
|
26 | | - let private runNotEmpty (inputArray: 'a []) (plus: Expr<'a -> 'a -> 'a>) (zero: 'a) = |
27 | | - opencl { |
28 | | - let workGroupSize = 256 |
| 22 | + let gid = ndRange.GlobalID0 |
| 23 | + let lid = ndRange.LocalID0 |
29 | 24 |
|
30 | | - let firstVertices = |
31 | | - Array.zeroCreate |
32 | | - <| (inputArray.Length - 1) / workGroupSize + 1 |
| 25 | + let localValues = localArray<'a> workGroupSize |
33 | 26 |
|
34 | | - let secondVertices = |
35 | | - Array.zeroCreate |
36 | | - <| (firstVertices.Length - 1) / workGroupSize + 1 |
| 27 | + if gid < length then |
| 28 | + localValues[lid] <- inputArray[gid] |
| 29 | + else |
| 30 | + localValues[lid] <- zero |
37 | 31 |
|
38 | | - let mutable verticesArrays = firstVertices, secondVertices |
39 | | - let swap (a, b) = (b, a) |
| 32 | + barrierLocal () |
40 | 33 |
|
41 | | - let mutable verticesLength = firstVertices.Length |
| 34 | + (%subSum) workGroupSize lid localValues |
42 | 35 |
|
43 | | - do! scan inputArray inputArray.Length (fst verticesArrays) plus zero |
| 36 | + resultArray[gid / workGroupSize] <- localValues[0] |
| 37 | + @> |
44 | 38 |
|
45 | | - while verticesLength > workGroupSize do |
46 | | - let fstVertices = fst verticesArrays |
47 | | - let sndVertices = snd verticesArrays |
48 | | - do! scan fstVertices verticesLength sndVertices plus zero |
| 39 | + let kernel = clContext.Compile(scan) |
49 | 40 |
|
50 | | - verticesArrays <- swap verticesArrays |
51 | | - verticesLength <- (verticesLength - 1) / workGroupSize + 1 |
| 41 | + fun (processor: MailboxProcessor<_>) (valuesArray: ClArray<'a>) valuesLength (resultArray: ClArray<'a>) -> |
| 42 | + let ndRange = Range1D.CreateValid(valuesArray.Length, workGroupSize) |
52 | 43 |
|
53 | | - let result = Array.create 1 zero |
| 44 | + let kernel = kernel.GetKernel() |
54 | 45 |
|
55 | | - let fstVertices = fst verticesArrays |
56 | | - do! scan fstVertices verticesLength result plus zero |
| 46 | + processor.Post( |
| 47 | + Msg.MsgSetArguments( |
| 48 | + fun () -> |
| 49 | + kernel.KernelFunc |
| 50 | + ndRange |
| 51 | + valuesLength |
| 52 | + valuesArray |
| 53 | + resultArray) |
| 54 | + ) |
57 | 55 |
|
58 | | - return result |
59 | | - } |
| 56 | + processor.Post(Msg.CreateRunMsg<_, _>(kernel)) |
60 | 57 |
|
61 | | - let private scan |
62 | | - (inputArray: 'a []) |
63 | | - (inputArrayLength: int) |
64 | | - (vertices: 'a []) |
65 | | - (plus: Expr<'a -> 'a -> 'a>) |
66 | | - (zero: 'a) |
| 58 | + () |
| 59 | + |
| 60 | + let private scanToCell |
| 61 | + (clContext: ClContext) |
| 62 | + (workGroupSize: int) |
| 63 | + (opAdd: Expr<'a -> 'a -> 'a>) |
| 64 | + zero |
67 | 65 | = |
68 | | - opencl { |
69 | | - let workGroupSize = 256 |
70 | 66 |
|
71 | | - let scan = |
72 | | - <@ fun (ndRange: Range1D) (resultBuffer: 'a []) (verticesBuffer: 'a []) -> |
| 67 | + let subSum = SubSum.sequentialSum opAdd |
| 68 | + |
| 69 | + let scan = |
| 70 | + <@ |
| 71 | + fun (ndRange: Range1D) length (inputArray: ClArray<'a>) (resultCell: ClCell<'a>) -> |
73 | 72 |
|
74 | | - let i = ndRange.GlobalID0 |
75 | | - let localID = ndRange.LocalID0 |
| 73 | + let gid = ndRange.GlobalID0 |
| 74 | + let lid = ndRange.LocalID0 |
76 | 75 |
|
77 | | - let resultLocalBuffer = localArray<'a> workGroupSize |
| 76 | + let localValues = localArray<'a> workGroupSize |
78 | 77 |
|
79 | | - if i < inputArrayLength then |
80 | | - resultLocalBuffer.[localID] <- resultBuffer.[i] |
| 78 | + if gid < length then |
| 79 | + localValues[lid] <- inputArray[gid] |
81 | 80 | else |
82 | | - resultLocalBuffer.[localID] <- zero |
| 81 | + localValues[lid] <- zero |
83 | 82 |
|
84 | | - let mutable step = 2 |
| 83 | + barrierLocal () |
85 | 84 |
|
86 | | - while step <= workGroupSize do |
87 | | - barrierLocal () |
| 85 | + (%subSum) workGroupSize lid localValues |
88 | 86 |
|
89 | | - if localID < workGroupSize / step then |
90 | | - let i = step * (localID + 1) - 1 |
91 | | - resultLocalBuffer.[i] <- (%plus) resultLocalBuffer.[i] resultLocalBuffer.[i - (step >>> 1)] |
| 87 | + resultCell.Value <- localValues[0] |
| 88 | + @> |
92 | 89 |
|
93 | | - step <- step <<< 1 |
| 90 | + let kernel = clContext.Compile(scan) |
94 | 91 |
|
95 | | - barrierLocal () |
| 92 | + fun (processor: MailboxProcessor<_>) (valuesArray: ClArray<'a>) valuesLength -> |
| 93 | + |
| 94 | + let ndRange = Range1D.CreateValid(valuesArray.Length, workGroupSize) |
| 95 | + |
| 96 | + let resultCell = clContext.CreateClCell zero |
| 97 | + |
| 98 | + let kernel = kernel.GetKernel() |
| 99 | + |
| 100 | + processor.Post( |
| 101 | + Msg.MsgSetArguments( |
| 102 | + fun () -> |
| 103 | + kernel.KernelFunc |
| 104 | + ndRange |
| 105 | + valuesLength |
| 106 | + valuesArray |
| 107 | + resultCell) |
| 108 | + ) |
| 109 | + |
| 110 | + processor.Post(Msg.CreateRunMsg<_, _>(kernel)) |
| 111 | + |
| 112 | + resultCell |
96 | 113 |
|
97 | | - if localID = workGroupSize - 1 then |
98 | | - verticesBuffer.[i / workGroupSize] <- resultLocalBuffer.[localID] @> |
| 114 | + let run |
| 115 | + (clContext: ClContext) |
| 116 | + (workGroupSize: int) |
| 117 | + (opAdd: Expr<'a -> 'a -> 'a>) |
| 118 | + (zero: 'a) |
| 119 | + = |
| 120 | + |
| 121 | + let scan = scan clContext workGroupSize opAdd zero |
| 122 | + let scanToCell = scanToCell clContext workGroupSize opAdd zero |
| 123 | + |
| 124 | + fun (processor: MailboxProcessor<_>) (inputArray: ClArray<'a>) -> |
| 125 | + |
| 126 | + let scan = scan processor |
| 127 | + |
| 128 | + let firstLength = (inputArray.Length - 1) / workGroupSize + 1 |
| 129 | + |
| 130 | + let firstVerticesArray = |
| 131 | + clContext.CreateClArray( |
| 132 | + firstLength, |
| 133 | + hostAccessMode = HostAccessMode.NotAccessible, |
| 134 | + deviceAccessMode = DeviceAccessMode.ReadWrite, |
| 135 | + allocationMode = AllocationMode.Default |
| 136 | + ) |
| 137 | + |
| 138 | + let secondLength = (firstLength - 1) / workGroupSize + 1 |
| 139 | + |
| 140 | + let secondVerticesArray = |
| 141 | + clContext.CreateClArray( |
| 142 | + secondLength, |
| 143 | + hostAccessMode = HostAccessMode.NotAccessible, |
| 144 | + deviceAccessMode = DeviceAccessMode.ReadWrite, |
| 145 | + allocationMode = AllocationMode.Default |
| 146 | + ) |
| 147 | + |
| 148 | + let mutable verticesArrays = firstVerticesArray, secondVerticesArray |
| 149 | + let swap (a, b) = (b, a) |
| 150 | + |
| 151 | + scan inputArray inputArray.Length (fst verticesArrays) |
| 152 | + |
| 153 | + let mutable verticesLength = firstLength |
| 154 | + |
| 155 | + while verticesLength > workGroupSize do |
| 156 | + let fstVertices = fst verticesArrays |
| 157 | + let sndVertices = snd verticesArrays |
| 158 | + |
| 159 | + scan fstVertices verticesLength sndVertices |
| 160 | + |
| 161 | + verticesArrays <- swap verticesArrays |
| 162 | + verticesLength <- (verticesLength - 1) / workGroupSize + 1 |
| 163 | + |
| 164 | + let fstVertices = fst verticesArrays |
| 165 | + let result = scanToCell processor fstVertices verticesLength |
99 | 166 |
|
100 | | - do! |
101 | | - runCommand scan |
102 | | - <| fun kernelPrepare -> |
103 | | - let ndRange = |
104 | | - Range1D.CreateValid(inputArrayLength, workGroupSize) |
| 167 | + processor.Post(Msg.CreateFreeMsg(firstVerticesArray)) |
| 168 | + processor.Post(Msg.CreateFreeMsg(secondVerticesArray)) |
105 | 169 |
|
106 | | - kernelPrepare ndRange inputArray vertices |
107 | | - } |
| 170 | + result |
0 commit comments