@@ -127,231 +127,83 @@ module ClArray =
127127
128128 outputArray
129129
130- let private update ( clContext : ClContext ) =
131-
132- let update =
133- <@ fun ( ndRange : Range1D ) inputArrayLength bunchLength ( resultBuffer : ClArray < int >) ( verticesBuffer : ClArray < int >) ->
134-
135- let i = ndRange.GlobalID0 + bunchLength
136-
137- if i < inputArrayLength then
138- resultBuffer.[ i] <-
139- resultBuffer.[ i]
140- + verticesBuffer.[ i / bunchLength] @>
141-
142- let kernel = clContext.Compile( update)
143-
144- fun ( processor : MailboxProcessor < _ >) workGroupSize ( inputArray : ClArray < int >) ( inputArrayLength : int ) ( vertices : ClArray < int >) ( bunchLength : int ) ->
145- let ndRange =
146- Range1D.CreateValid( inputArrayLength - bunchLength, workGroupSize)
147-
148- let kernel = kernel.GetKernel()
149-
150- processor.Post(
151- Msg.MsgSetArguments
152- ( fun () -> kernel.KernelFunc ndRange inputArrayLength bunchLength inputArray vertices)
153- )
154-
155- processor.Post( Msg.CreateRunMsg<_, _> kernel)
156-
157- let private scan ( clContext : ClContext ) workGroupSize =
158-
159- let scan =
160- <@ fun ( ndRange : Range1D ) inputArrayLength verticesLength ( resultBuffer : ClArray < int >) ( verticesBuffer : ClArray < int >) ( totalSumBuffer : ClCell < int >) ->
161-
162- let resultLocalBuffer = localArray< int> workGroupSize
163- let i = ndRange.GlobalID0
164- let localID = ndRange.LocalID0
165-
166- if i < inputArrayLength then
167- resultLocalBuffer.[ localID] <- resultBuffer.[ i]
168- else
169- resultLocalBuffer.[ localID] <- 0
170-
171- let mutable step = 2
172-
173- while step <= workGroupSize do
174- barrierLocal ()
175-
176- if localID < workGroupSize / step then
177- let i = step * ( localID + 1 ) - 1
178-
179- resultLocalBuffer.[ i] <-
180- resultLocalBuffer.[ i]
181- + resultLocalBuffer.[ i - ( step >>> 1 )]
182-
183- step <- step <<< 1
184-
185- barrierLocal ()
186-
187- if localID = workGroupSize - 1 then
188- if verticesLength <= 1 && localID = i then
189- totalSumBuffer.Value <- resultLocalBuffer.[ localID]
190-
191- verticesBuffer.[ i / workGroupSize] <- resultLocalBuffer.[ localID]
192- resultLocalBuffer.[ localID] <- 0
193-
194- step <- workGroupSize
195-
196- while step > 1 do
197- barrierLocal ()
198-
199- if localID < workGroupSize / step then
200- let i = step * ( localID + 1 ) - 1
201- let j = i - ( step >>> 1 )
202-
203- let tmp = resultLocalBuffer.[ i]
204- resultLocalBuffer.[ i] <- resultLocalBuffer.[ i] + resultLocalBuffer.[ j]
205- resultLocalBuffer.[ j] <- tmp
206-
207- step <- step >>> 1
208-
209- barrierLocal ()
210-
211- if i < inputArrayLength then
212- resultBuffer.[ i] <- resultLocalBuffer.[ localID] @>
213-
214- let kernel = clContext.Compile( scan)
215-
216- fun ( processor : MailboxProcessor < _ >) ( inputArray : ClArray < int >) ( inputArrayLength : int ) ( vertices : ClArray < int >) ( verticesLength : int ) ( totalSum : ClCell < int >) ->
217- let ndRange =
218- Range1D.CreateValid( inputArrayLength, workGroupSize)
219-
220- let kernel = kernel.GetKernel()
221-
222- processor.Post(
223- Msg.MsgSetArguments
224- ( fun () -> kernel.KernelFunc ndRange inputArrayLength verticesLength inputArray vertices totalSum)
225- )
226-
227- processor.Post( Msg.CreateRunMsg<_, _> kernel)
228-
229130 /// <summary >
230131 /// Exclude inplace prefix sum.
231132 /// </summary >
232133 /// <example >
233134 /// <code >
234- /// let arr = [ | 1; 2; 3 |]
135+ /// let arr = [ | 1; 1; 1; 1 |]
235136 /// let sum = [ | 0 |]
236- /// opencl { do! runExcludeInplace arr sum }
137+ /// runExcludeInplace clContext workGroupSize processor arr sum <@ (+) @> 0
138+ /// |> ignore
237139 /// ...
238- /// > val arr = [ | 0; 1; 3 |]
239- /// > val sum = [ | 6 |]
140+ /// > val arr = [ | 0; 1; 2; 3 |]
141+ /// > val sum = [ | 4 |]
240142 /// </code>
241143 /// </example>
242144 ///<param name="clContext"> .</param >
243145 ///<param name="workGroupSize"> Should be a power of 2 and greater than 1.</param >
244- let prefixSumExcludeInplace ( clContext : ClContext ) workGroupSize =
245-
246- let scan = scan clContext workGroupSize
247- let update = update clContext
248-
249- fun ( processor : MailboxProcessor < _ >) ( inputArray : ClArray < int >) ( totalSum : ClCell < int >) ->
250- let firstVertices =
251- clContext.CreateClArray< int>(
252- ( inputArray.Length - 1 ) / workGroupSize + 1 ,
253- hostAccessMode = HostAccessMode.NotAccessible,
254- allocationMode = AllocationMode.Default
255- )
256-
257- let secondVertices =
258- clContext.CreateClArray< int>(
259- ( firstVertices.Length - 1 ) / workGroupSize + 1 ,
260- hostAccessMode = HostAccessMode.NotAccessible,
261- allocationMode = AllocationMode.Default
262- )
263-
264- let mutable verticesArrays = firstVertices, secondVertices
265- let swap ( a , b ) = ( b, a)
266- let mutable verticesLength = firstVertices.Length
267- let mutable bunchLength = workGroupSize
268-
269- scan processor inputArray inputArray.Length ( fst verticesArrays) verticesLength totalSum
270-
271- while verticesLength > 1 do
272- let fstVertices = fst verticesArrays
273- let sndVertices = snd verticesArrays
274-
275- scan
276- processor
277- fstVertices
278- verticesLength
279- sndVertices
280- (( verticesLength - 1 ) / workGroupSize + 1 )
281- totalSum
282-
283- update processor workGroupSize inputArray inputArray.Length fstVertices bunchLength
284- bunchLength <- bunchLength * workGroupSize
285- verticesArrays <- swap verticesArrays
286- verticesLength <- ( verticesLength - 1 ) / workGroupSize + 1
287-
288- processor.Post( Msg.CreateFreeMsg( firstVertices))
289- processor.Post( Msg.CreateFreeMsg( secondVertices))
290-
291- inputArray, totalSum
292-
293- ///<param name =" clContext " >.</param >
294- ///<param name =" workGroupSize " >Should be a power of 2 and greater than 1.</param >
295- let prefixSumExclude ( clContext : ClContext ) workGroupSize =
296-
297- let copy = copy clContext workGroupSize
298-
299- let prefixSumExcludeInplace =
300- prefixSumExcludeInplace clContext workGroupSize
301-
302- fun ( processor : MailboxProcessor < _ >) ( inputArray : ClArray < int >) ->
303- let copiedArray = copy processor inputArray
304-
305- let totalSum = clContext.CreateClCell 0
306- prefixSumExcludeInplace processor copiedArray totalSum
146+ ///<param name="processor"> .</param >
147+ ///<param name="inputArray"> .</param >
148+ ///<param name="totalSum"> .</param >
149+ ///<param name="plus"> Associative binary operation.</param >
150+ ///<param name="zero"> Zero element for binary operation.</param >
151+ let prefixSumExcludeInplace = PrefixSum.runExcludeInplace
307152
153+ /// <summary >
154+ /// Include inplace prefix sum.
155+ /// </summary >
156+ /// <example >
157+ /// <code >
158+ /// let arr = [ | 1; 1; 1; 1 |]
159+ /// let sum = [ | 0 |]
160+ /// runExcludeInplace clContext workGroupSize processor arr sum <@ (+) @> 0
161+ /// |> ignore
162+ /// ...
163+ /// > val arr = [ | 1; 2; 3; 4 |]
164+ /// > val sum = [ | 4 |]
165+ /// </code>
166+ /// </example>
308167 ///<param name="clContext"> .</param >
309168 ///<param name="workGroupSize"> Should be a power of 2 and greater than 1.</param >
310- let prefixSumInclude ( clContext : ClContext ) workGroupSize =
311-
312- let kernel =
313- <@ fun ( range : Range1D ) ( inputArray : ClArray < int >) inputArrayLength ( totalSum : ClCell < int >) ( outputArray : ClArray < int >) ->
169+ ///<param name="processor"> .</param >
170+ ///<param name="inputArray"> .</param >
171+ ///<param name="totalSum"> .</param >
172+ ///<param name="plus"> Associative binary operation.</param >
173+ ///<param name="zero"> Zero element for binary operation.</param >
174+ let prefixSumIncludeInplace = PrefixSum.runIncludeInplace
314175
315- let gid = range.GlobalID0
176+ let prefixSumExclude plus ( clContext : ClContext ) workGroupSize =
316177
317- if gid = inputArrayLength - 1 then
318- outputArray.[ gid] <- totalSum.Value
319- elif gid < inputArrayLength - 1 then
320- outputArray.[ gid] <- inputArray.[ gid + 1 ] @>
178+ let runExcludeInplace =
179+ prefixSumExcludeInplace plus clContext workGroupSize
321180
322- let kernel = clContext.Compile( kernel)
323181 let copy = copy clContext workGroupSize
324182
325- let prefixSumExcludeInplace =
326- prefixSumExcludeInplace clContext workGroupSize
183+ fun ( processor : MailboxProcessor < _ >) ( inputArray : ClArray < 'a >) ( totalSum : ClCell < 'a >) ( zero : 'a ) ->
327184
328- fun ( processor : MailboxProcessor < _ >) ( inputArray : ClArray < 'a >) ->
329- let copiedArray = copy processor inputArray
330- let inputArrayLength = inputArray.Length
331- let totalSum = clContext.CreateClCell 0
185+ let outputArray = copy processor inputArray
332186
333- let _ , totalSum =
334- prefixSumExcludeInplace processor copiedArray totalSum
187+ runExcludeInplace processor outputArray totalSum zero
335188
336- let outputArray =
337- clContext.CreateClArray( inputArrayLength, allocationMode = AllocationMode.Default)
189+ let prefixSumInclude plus ( clContext : ClContext ) workGroupSize =
338190
339- let ndRange =
340- Range1D.CreateValid ( inputArrayLength , workGroupSize)
191+ let runIncludeInplace =
192+ prefixSumIncludeInplace plus clContext workGroupSize
341193
342- let kernel = kernel.GetKernel ()
194+ let copy = copy clContext workGroupSize
343195
344- processor.Post(
345- Msg.MsgSetArguments
346- ( fun () -> kernel.KernelFunc ndRange copiedArray inputArrayLength totalSum outputArray)
347- )
196+ fun ( processor : MailboxProcessor < _ >) ( inputArray : ClArray < 'a >) ( totalSum : ClCell < 'a >) ( zero : 'a ) ->
348197
349- processor.Post ( Msg.CreateRunMsg <_, _> kernel )
198+ let outputArray = copy processor inputArray
350199
351- processor.Post ( Msg.CreateFreeMsg ( copiedArray ))
200+ runIncludeInplace processor outputArray totalSum zero
352201
353- outputArray, totalSum
202+ let prefixSumBackwardsExcludeInplace plus =
203+ PrefixSum.runBackwardsExcludeInplace plus
354204
205+ let prefixSumBackwardsIncludeInplace plus =
206+ PrefixSum.runBackwardsIncludeInplace plus
355207
356208 let getUniqueBitmap ( clContext : ClContext ) =
357209
@@ -391,7 +243,6 @@ module ClArray =
391243
392244 bitmap
393245
394-
395246 let setPositions ( clContext : ClContext ) =
396247
397248 let setPositions =
@@ -431,20 +282,26 @@ module ClArray =
431282
432283 let setPositions = setPositions clContext
433284 let getUniqueBitmap = getUniqueBitmap clContext
434- let prefixSumExclude = prefixSumExclude clContext workGroupSize
285+
286+ let prefixSumExclude =
287+ prefixSumExclude <@ (+) @> clContext workGroupSize
435288
436289 fun ( processor : MailboxProcessor < _ >) ( inputArray : ClArray < 'a >) ->
437290
438291 let bitmap =
439292 getUniqueBitmap processor workGroupSize inputArray
440293
441- let ( positions , sum ) = prefixSumExclude processor bitmap
294+ let sum = clContext.CreateClCell 0
295+
296+ let positions , sum = prefixSumExclude processor bitmap sum 0
442297
443298 let resultLength =
444299 let a = [| 0 |]
445300
446- let _ =
447- processor.PostAndReply( fun ch -> Msg.CreateToHostMsg( sum, a, ch))
301+ processor.PostAndReply( fun ch -> Msg.CreateToHostMsg( sum, a, ch))
302+ |> ignore
303+
304+ processor.Post( Msg.CreateFreeMsg<_>( sum))
448305
449306 a.[ 0 ]
450307
0 commit comments