|
| 1 | +namespace GraphBLAS.FSharp.Backend.Common |
| 2 | + |
| 3 | +open Brahma.OpenCL |
| 4 | +open Brahma.FSharp.OpenCL.WorkflowBuilder.Basic |
| 5 | +open Brahma.FSharp.OpenCL.WorkflowBuilder.Evaluation |
| 6 | + |
| 7 | +// functions in mudule could be named run\get\if\it\t |
| 8 | +// like mentioned here https://www.reddit.com/r/fsharp/comments/5kvsyk/modules_or_namespaces/dbt0zf7?utm_source=share&utm_medium=web2x&context=3 |
| 9 | +module internal PrefixSum = |
| 10 | + let scan (inputArray: int[]) (inputArrayLength: int) (vertices: int[]) (verticesLength: int) (totalSum: int[]) : OpenCLEvaluation<unit> = opencl { |
| 11 | + let workGroupSize = Utils.workGroupSize |
| 12 | + |
| 13 | + let scan = |
| 14 | + <@ |
| 15 | + fun (ndRange: _1D) |
| 16 | + (resultBuffer: int[]) |
| 17 | + (verticesBuffer: int[]) |
| 18 | + (totalSumBuffer: int[]) -> |
| 19 | + |
| 20 | + let resultLocalBuffer = localArray<int> workGroupSize |
| 21 | + let i = ndRange.GlobalID0 |
| 22 | + let localID = ndRange.LocalID0 |
| 23 | + |
| 24 | + if i < inputArrayLength then resultLocalBuffer.[localID] <- resultBuffer.[i] else resultLocalBuffer.[localID] <- 0 |
| 25 | + |
| 26 | + let mutable step = 2 |
| 27 | + while step <= workGroupSize do |
| 28 | + barrier () |
| 29 | + if localID < workGroupSize / step then |
| 30 | + let i = step * (localID + 1) - 1 |
| 31 | + resultLocalBuffer.[i] <- resultLocalBuffer.[i] + resultLocalBuffer.[i - (step >>> 1)] |
| 32 | + step <- step <<< 1 |
| 33 | + barrier () |
| 34 | + |
| 35 | + if localID = workGroupSize - 1 then |
| 36 | + if verticesLength <= 1 && localID = i then totalSumBuffer.[0] <- resultLocalBuffer.[localID] |
| 37 | + verticesBuffer.[i / workGroupSize] <- resultLocalBuffer.[localID] |
| 38 | + resultLocalBuffer.[localID] <- 0 |
| 39 | + |
| 40 | + step <- workGroupSize |
| 41 | + while step > 1 do |
| 42 | + barrier () |
| 43 | + if localID < workGroupSize / step then |
| 44 | + let i = step * (localID + 1) - 1 |
| 45 | + let j = i - (step >>> 1) |
| 46 | + |
| 47 | + let tmp = resultLocalBuffer.[i] |
| 48 | + resultLocalBuffer.[i] <- resultLocalBuffer.[i] + resultLocalBuffer.[j] |
| 49 | + resultLocalBuffer.[j] <- tmp |
| 50 | + step <- step >>> 1 |
| 51 | + barrier () |
| 52 | + |
| 53 | + if i < inputArrayLength then resultBuffer.[i] <- resultLocalBuffer.[localID] |
| 54 | + @> |
| 55 | + |
| 56 | + do! RunCommand scan <| fun kernelPrepare -> |
| 57 | + let ndRange = _1D(Utils.workSize inputArrayLength, workGroupSize) |
| 58 | + kernelPrepare |
| 59 | + ndRange |
| 60 | + inputArray |
| 61 | + vertices |
| 62 | + totalSum |
| 63 | + } |
| 64 | + |
| 65 | + let update (inputArray: int[]) (inputArrayLength: int) (vertices: int[]) (bunchLength: int) : OpenCLEvaluation<unit> = opencl { |
| 66 | + let workGroupSize = Utils.workGroupSize |
| 67 | + |
| 68 | + let update = |
| 69 | + <@ |
| 70 | + fun (ndRange: _1D) |
| 71 | + (resultBuffer: int[]) |
| 72 | + (verticesBuffer: int[]) -> |
| 73 | + |
| 74 | + let i = ndRange.GlobalID0 + bunchLength |
| 75 | + if i < inputArrayLength then |
| 76 | + resultBuffer.[i] <- resultBuffer.[i] + verticesBuffer.[i / bunchLength] |
| 77 | + @> |
| 78 | + |
| 79 | + do! RunCommand update <| fun kernelPrepare -> |
| 80 | + let ndRange = _1D(Utils.workSize inputArrayLength - bunchLength, workGroupSize) |
| 81 | + kernelPrepare |
| 82 | + ndRange |
| 83 | + inputArray |
| 84 | + vertices |
| 85 | + } |
| 86 | + |
| 87 | + // Changes received arrays |
| 88 | + let run (inputArray: int[]) (totalSum: int[]) = opencl { |
| 89 | + let workGroupSize = Utils.workGroupSize |
| 90 | + |
| 91 | + let firstVertices = Array.zeroCreate <| (inputArray.Length - 1) / workGroupSize + 1 |
| 92 | + let secondVertices = Array.zeroCreate <| (firstVertices.Length - 1) / workGroupSize + 1 |
| 93 | + let mutable verticesArrays = firstVertices, secondVertices |
| 94 | + let swap (a, b) = (b, a) |
| 95 | + |
| 96 | + let mutable verticesLength = (inputArray.Length - 1) / workGroupSize + 1 |
| 97 | + let mutable bunchLength = workGroupSize |
| 98 | + |
| 99 | + do! scan inputArray inputArray.Length (fst verticesArrays) verticesLength totalSum |
| 100 | + while verticesLength > 1 do |
| 101 | + let fstVertices = fst verticesArrays |
| 102 | + let sndVertices = snd verticesArrays |
| 103 | + do! scan fstVertices verticesLength sndVertices ((verticesLength - 1) / workGroupSize + 1) totalSum |
| 104 | + do! update inputArray inputArray.Length fstVertices bunchLength |
| 105 | + |
| 106 | + bunchLength <- bunchLength * workGroupSize |
| 107 | + verticesArrays <- swap verticesArrays |
| 108 | + verticesLength <- (verticesLength - 1) / workGroupSize + 1 |
| 109 | + } |
| 110 | + |
| 111 | + // let rec v1 (inputArray: int[]) = |
| 112 | + // let outputArray = Array.zeroCreate inputArray.Length |
| 113 | + |
| 114 | + // if inputArray.Length = 1 then |
| 115 | + // let fillOutputArray = |
| 116 | + // <@ |
| 117 | + // fun (ndRange: _1D) |
| 118 | + // (inputArrayBuffer: int[]) |
| 119 | + // (outputArrayBuffer: int[]) -> |
| 120 | + |
| 121 | + // let i = ndRange.GlobalID0 |
| 122 | + // outputArrayBuffer.[i] <- inputArrayBuffer.[i] |
| 123 | + // @> |
| 124 | + |
| 125 | + // opencl { |
| 126 | + // let binder kernelP = |
| 127 | + // let ndRange = _1D(outputArray.Length) |
| 128 | + // kernelP |
| 129 | + // ndRange |
| 130 | + // inputArray |
| 131 | + // outputArray |
| 132 | + // do! RunCommand fillOutputArray binder |
| 133 | + // return outputArray |
| 134 | + // } |
| 135 | + // else |
| 136 | + // let intermediateArray = Array.zeroCreate ((inputArray.Length + 1) / 2) |
| 137 | + // let inputArrayLength = inputArray.Length |
| 138 | + // let intermediateArrayLength = intermediateArray.Length |
| 139 | + |
| 140 | + // let fillIntermediateArray = |
| 141 | + // <@ |
| 142 | + // fun (ndRange: _1D) |
| 143 | + // (inputArrayBuffer: int[]) |
| 144 | + // (intermediateArrayBuffer: int[]) -> |
| 145 | + |
| 146 | + // let i = ndRange.GlobalID0 |
| 147 | + // if i < intermediateArrayLength then |
| 148 | + // if 2 * i + 1 < inputArrayLength then |
| 149 | + // intermediateArrayBuffer.[i] <- inputArrayBuffer.[2 * i] + inputArrayBuffer.[2 * i + 1] |
| 150 | + // else intermediateArrayBuffer.[i] <- inputArrayBuffer.[2 * i] |
| 151 | + // @> |
| 152 | + |
| 153 | + // let fillIntermediateArray = |
| 154 | + // opencl { |
| 155 | + // let binder kernelP = |
| 156 | + // let ndRange = _1D(workSize intermediateArray.Length, workGroupSize) |
| 157 | + // kernelP |
| 158 | + // ndRange |
| 159 | + // inputArray |
| 160 | + // intermediateArray |
| 161 | + // do! RunCommand fillIntermediateArray binder |
| 162 | + // } |
| 163 | + |
| 164 | + // let fillOutputArray = |
| 165 | + // <@ |
| 166 | + // fun (ndRange: _1D) |
| 167 | + // (auxiliaryPrefixSumArrayBuffer: int[]) |
| 168 | + // (inputArrayBuffer: int[]) |
| 169 | + // (outputArrayBuffer: int[]) -> |
| 170 | + |
| 171 | + // let i = ndRange.GlobalID0 |
| 172 | + // if i < inputArrayLength then |
| 173 | + // let j = (i - 1) / 2 |
| 174 | + // if i % 2 = 0 then |
| 175 | + // if i = 0 then outputArrayBuffer.[i] <- inputArrayBuffer.[i] |
| 176 | + // else outputArrayBuffer.[i] <- auxiliaryPrefixSumArrayBuffer.[j] + inputArrayBuffer.[i] |
| 177 | + // else outputArrayBuffer.[i] <- auxiliaryPrefixSumArrayBuffer.[j] |
| 178 | + // @> |
| 179 | + |
| 180 | + // opencl { |
| 181 | + // do! fillIntermediateArray |
| 182 | + // let! auxiliaryPrefixSumArray = v1 intermediateArray |
| 183 | + |
| 184 | + // let binder kernelP = |
| 185 | + // let ndRange = _1D(workSize inputArray.Length, workGroupSize) |
| 186 | + // kernelP |
| 187 | + // ndRange |
| 188 | + // auxiliaryPrefixSumArray |
| 189 | + // inputArray |
| 190 | + // outputArray |
| 191 | + // do! RunCommand fillOutputArray binder |
| 192 | + |
| 193 | + // return outputArray |
| 194 | + // } |
| 195 | + |
| 196 | + // let v2 (inputArray: int[]) = |
| 197 | + // let firstIntermediateArray = Array.copy inputArray |
| 198 | + // let secondIntermediateArray = Array.copy inputArray |
| 199 | + // let outputArrayLength = firstIntermediateArray.Length |
| 200 | + |
| 201 | + // let updateResult = |
| 202 | + // <@ |
| 203 | + // fun (ndRange: _1D) |
| 204 | + // (offset: int) |
| 205 | + // (firstIntermediateArrayBuffer: int[]) |
| 206 | + // (secondIntermediateArrayBuffer: int[]) -> |
| 207 | + |
| 208 | + // let i = ndRange.GlobalID0 |
| 209 | + // if i < outputArrayLength then |
| 210 | + // if i < offset then firstIntermediateArrayBuffer.[i] <- secondIntermediateArrayBuffer.[i] |
| 211 | + // else firstIntermediateArrayBuffer.[i] <- secondIntermediateArrayBuffer.[i] + secondIntermediateArrayBuffer.[i - offset] |
| 212 | + // @> |
| 213 | + |
| 214 | + // let binder offset firstIntermediateArray secondIntermediateArray kernelP = |
| 215 | + // let ndRange = _1D(workSize outputArrayLength, workGroupSize) |
| 216 | + // kernelP |
| 217 | + // ndRange |
| 218 | + // offset |
| 219 | + // firstIntermediateArray |
| 220 | + // secondIntermediateArray |
| 221 | + |
| 222 | + // let swap (a, b) = (b, a) |
| 223 | + // let mutable arrays = firstIntermediateArray, secondIntermediateArray |
| 224 | + |
| 225 | + // opencl { |
| 226 | + // let mutable offset = 1 |
| 227 | + // while offset < outputArrayLength do |
| 228 | + // arrays <- swap arrays |
| 229 | + // do! RunCommand updateResult <| (binder offset <|| arrays) |
| 230 | + // offset <- offset * 2 |
| 231 | + |
| 232 | + // return (fst arrays) |
| 233 | + // } |
0 commit comments