@@ -315,7 +315,7 @@ module Reduce =
315315
316316 processor.Post( Msg.CreateRunMsg<_, _>( kernel))
317317
318- reducedKeys , reducedValues
318+ reducedValues , reducedKeys
319319
320320 /// <summary >
321321 /// Reduces values by key. Each segment is reduced by one work item.
@@ -381,7 +381,7 @@ module Reduce =
381381
382382 processor.Post( Msg.CreateRunMsg<_, _>( kernel))
383383
384- reducedKeys , reducedValues
384+ reducedValues , reducedKeys
385385
386386 /// <summary >
387387 /// Reduces values by key. One work group participates in the reduction.
@@ -470,8 +470,120 @@ module Reduce =
470470
471471 processor.Post( Msg.CreateRunMsg<_, _>( kernel))
472472
473- reducedKeys , reducedValues
473+ reducedValues , reducedKeys
474474
475+ /// <summary >
476+ /// Reduces values by key. Each segment is reduced by one work item.
477+ /// </summary >
478+ /// <param name =" clContext " >ClContext.</param >
479+ /// <param name =" workGroupSize " >Work group size.</param >
480+ /// <param name =" reduceOp " >Operation for reducing values.</param >
481+ /// <remarks >
482+ /// The length of the result must be calculated in advance.
483+ /// </remarks >
484+ let segmentSequentialOption < 'a > ( clContext : ClContext ) workGroupSize ( reduceOp : Expr < 'a -> 'a -> 'a option >) =
485+
486+ let kernel =
487+ <@ fun ( ndRange : Range1D ) uniqueKeyCount keysLength ( offsets : ClArray < int >) ( keys : ClArray < int >) ( values : ClArray < 'a >) ( reducedValues : ClArray < 'a >) ( firstReducedKeys : ClArray < int >) ( resultPositions : ClArray < int >) ->
488+
489+ let gid = ndRange.GlobalID0
490+
491+ if gid < uniqueKeyCount then
492+ let startPosition = offsets.[ gid]
493+
494+ let firstSourceKey = keys.[ startPosition]
495+
496+ let mutable sum = Some values.[ startPosition]
497+
498+ let mutable currentPosition = startPosition + 1
499+
500+ while currentPosition < keysLength
501+ && firstSourceKey = keys.[ currentPosition] do
502+
503+ match sum with
504+ | Some value ->
505+ let result =
506+ ((% reduceOp) value values.[ currentPosition]) // brahma error
507+
508+ sum <- result
509+ | None -> sum <- Some values.[ currentPosition]
510+
511+ currentPosition <- currentPosition + 1
512+
513+ match sum with
514+ | Some value ->
515+ reducedValues.[ gid] <- value
516+ resultPositions.[ gid] <- 1
517+ | None -> resultPositions.[ gid] <- 0
518+
519+ firstReducedKeys.[ gid] <- firstSourceKey @>
520+
521+ let kernel = clContext.Compile kernel
522+
523+ let scatterData =
524+ Scatter.lastOccurrence clContext workGroupSize
525+
526+ let scatterIndices =
527+ Scatter.lastOccurrence clContext workGroupSize
528+
529+ let prefixSum =
530+ PrefixSum.standardExcludeInplace clContext workGroupSize
531+
532+ fun ( processor : MailboxProcessor < _ >) allocationMode ( resultLength : int ) ( offsets : ClArray < int >) ( keys : ClArray < int >) ( values : ClArray < 'a >) ->
533+
534+ let reducedValues =
535+ clContext.CreateClArrayWithSpecificAllocationMode( allocationMode, resultLength)
536+
537+ let reducedKeys =
538+ clContext.CreateClArrayWithSpecificAllocationMode( allocationMode, resultLength)
539+
540+ let resultPositions =
541+ clContext.CreateClArrayWithSpecificAllocationMode( DeviceOnly, resultLength)
542+
543+ let ndRange =
544+ Range1D.CreateValid( resultLength, workGroupSize)
545+
546+ let kernel = kernel.GetKernel()
547+
548+ processor.Post(
549+ Msg.MsgSetArguments
550+ ( fun () ->
551+ kernel.KernelFunc
552+ ndRange
553+ resultLength
554+ keys.Length
555+ offsets
556+ keys
557+ values
558+ reducedValues
559+ reducedKeys
560+ resultPositions)
561+ )
562+
563+ processor.Post( Msg.CreateRunMsg<_, _>( kernel))
564+
565+ let resultLength =
566+ ( prefixSum processor resultPositions)
567+ .ToHostAndFree processor
568+
569+ if resultLength = 0 then None
570+ else
571+ let resultValues =
572+ clContext.CreateClArrayWithSpecificAllocationMode( allocationMode, resultLength)
573+
574+ scatterData processor resultPositions reducedValues resultValues
575+
576+ reducedValues.Free processor
577+
578+ let resultKeys =
579+ clContext.CreateClArrayWithSpecificAllocationMode( allocationMode, resultLength)
580+
581+ scatterIndices processor resultPositions reducedKeys resultKeys // TODO(mb error)
582+
583+ reducedKeys.Free processor
584+ resultPositions.Free processor
585+
586+ Some ( resultValues, reducedKeys)
475587 module ByKey2D =
476588 /// <summary >
477589 /// Reduce an array of values by 2D keys using a single work item.
@@ -550,7 +662,7 @@ module Reduce =
550662
551663 processor.Post( Msg.CreateRunMsg<_, _>( kernel))
552664
553- firstReducedKeys , secondReducedKeys , reducedValues
665+ reducedValues , firstReducedKeys , secondReducedKeys
554666
555667 /// <summary >
556668 /// Reduces values by key. Each segment is reduced by one work item.
@@ -625,7 +737,7 @@ module Reduce =
625737
626738 processor.Post( Msg.CreateRunMsg<_, _>( kernel))
627739
628- firstReducedKeys , secondReducedKeys , reducedValues
740+ reducedValues , firstReducedKeys , secondReducedKeys
629741
630742 /// <summary >
631743 /// Reduces values by key. Each segment is reduced by one work item.
@@ -729,27 +841,29 @@ module Reduce =
729841 ( prefixSum processor resultPositions)
730842 .ToHostAndFree processor
731843
732- let resultValues =
733- clContext.CreateClArrayWithSpecificAllocationMode( allocationMode, resultLength)
844+ if resultLength = 0 then None
845+ else
846+ let resultValues =
847+ clContext.CreateClArrayWithSpecificAllocationMode( allocationMode, resultLength)
734848
735- scatterData processor resultPositions reducedValues resultValues
849+ scatterData processor resultPositions reducedValues resultValues
736850
737- reducedValues.Free processor
851+ reducedValues.Free processor
738852
739- let resultFirstKeys =
740- clContext.CreateClArrayWithSpecificAllocationMode( allocationMode, resultLength)
853+ let resultFirstKeys =
854+ clContext.CreateClArrayWithSpecificAllocationMode( allocationMode, resultLength)
741855
742- scatterIndices processor resultPositions firstReducedKeys resultFirstKeys
856+ scatterIndices processor resultPositions firstReducedKeys resultFirstKeys
743857
744- firstReducedKeys.Free processor
858+ firstReducedKeys.Free processor
745859
746- let resultSecondKeys =
747- clContext.CreateClArrayWithSpecificAllocationMode( allocationMode, resultLength)
860+ let resultSecondKeys =
861+ clContext.CreateClArrayWithSpecificAllocationMode( allocationMode, resultLength)
748862
749- scatterIndices processor resultPositions secondReducedKeys resultSecondKeys
863+ scatterIndices processor resultPositions secondReducedKeys resultSecondKeys
750864
751- secondReducedKeys.Free processor
865+ secondReducedKeys.Free processor
752866
753- resultPositions.Free processor
867+ resultPositions.Free processor
754868
755- resultFirstKeys , resultSecondKeys , resultValues
869+ Some ( resultValues , resultFirstKeys , resultSecondKeys )
0 commit comments