@@ -184,16 +184,16 @@ module Expand =
184184
185185 fun ( processor : MailboxProcessor < _ >) ( values : ClArray < 'a >) ( columns : Indices ) ( rows : Indices ) ->
186186 // sort by columns
187- let valuesSortedByColumns = sortByKeyValues processor columns values
187+ let valuesSortedByColumns = sortByKeyValues processor DeviceOnly columns values
188188
189- let rowsSortedByColumns = sortByKeyIndices processor columns rows
189+ let rowsSortedByColumns = sortByKeyIndices processor DeviceOnly columns rows
190190
191191 let sortedColumns = sortKeys processor columns
192192
193193 // sort by rows
194- let valuesSortedByRows = sortByKeyValues processor rows valuesSortedByColumns
194+ let valuesSortedByRows = sortByKeyValues processor DeviceOnly rows valuesSortedByColumns
195195
196- let columnsSortedByRows = sortByKeyIndices processor rows sortedColumns
196+ let columnsSortedByRows = sortByKeyIndices processor DeviceOnly rows sortedColumns
197197
198198 let sortedRows = sortKeys processor rowsSortedByColumns
199199
@@ -208,21 +208,36 @@ module Expand =
208208 let reduce = Reduce.ByKey2D.segmentSequential clContext workGroupSize opAdd
209209
210210 let getUniqueBitmap =
211- ClArray.getUniqueBitmap2 clContext workGroupSize
211+ ClArray.getUniqueBitmap2FirstOccurrence clContext workGroupSize
212212
213213 let prefixSum = PrefixSum.standardExcludeInplace clContext workGroupSize
214214
215- let removeDuplicates = ClArray.removeDuplications clContext workGroupSize
215+ let init = ClArray.init clContext workGroupSize Map.id // TODO(fuse)
216+
217+ let scatter = Scatter.runInplace clContext workGroupSize
216218
217219 fun ( processor : MailboxProcessor < _ >) allocationMode ( values : ClArray < 'a >) ( columns : Indices ) ( rows : Indices ) ->
218220
219221 let bitmap = getUniqueBitmap processor DeviceOnly columns rows
220222
223+ printfn $" key bitmap: %A {bitmap.ToHost processor}"
224+
221225 let uniqueKeysCount = ( prefixSum processor bitmap) .ToHostAndFree processor
222226
223- let offsets = removeDuplicates processor bitmap
227+ printfn $" key bitmap after prefix sum: %A {bitmap.ToHost processor}"
228+
229+ let positions = init processor DeviceOnly bitmap.Length
230+
231+ printfn $" positions: %A {positions.ToHost processor}"
232+
233+ let offsets = clContext.CreateClArrayWithSpecificAllocationMode( DeviceOnly, uniqueKeysCount)
234+
235+ scatter processor bitmap positions offsets
236+
237+ printfn $" offsets: %A {offsets.ToHost processor}"
224238
225239 bitmap.Free processor
240+ positions.Free processor
226241
227242 let reducedColumns , reducedRows , reducedValues =
228243 reduce processor allocationMode uniqueKeysCount offsets columns rows values
@@ -231,7 +246,7 @@ module Expand =
231246
232247 reducedValues, reducedColumns, reducedRows
233248
234- let run ( clContext : ClContext ) workGroupSize opMul opAdd =
249+ let run ( clContext : ClContext ) workGroupSize opAdd opMul =
235250
236251 let getSegmentPointers = getSegmentPointers clContext workGroupSize
237252
@@ -248,18 +263,31 @@ module Expand =
248263 let values , columns , rows =
249264 expand processor length segmentPointers leftMatrix rightMatrix
250265
266+ printfn $" expanded values: %A {values.ToHost processor}"
267+ printfn $" expanded columns: %A {columns.ToHost processor}"
268+ printfn $" expanded rows: %A {rows.ToHost processor}"
269+
251270 let sortedValues , sortedColumns , sortedRows =
252271 sort processor values columns rows
253272
273+ printfn $" sorted values: %A {sortedValues.ToHost processor}"
274+ printfn $" sorted columns: %A {sortedColumns.ToHost processor}"
275+ printfn $" sorted rows: %A {sortedRows.ToHost processor}"
276+
254277 values.Free processor
255278 columns.Free processor
256279 rows.Free processor
257280
258281 let reducedValues , reducedColumns , reducedRows =
259282 reduce processor allocationMode sortedValues sortedColumns sortedRows
260283
284+ printfn $" reduced values: %A {reducedValues.ToHost processor}"
285+ printfn $" reduced columns: %A {reducedColumns.ToHost processor}"
286+ printfn $" reduced rows: %A {reducedRows.ToHost processor}"
287+
261288 sortedValues.Free processor
262289 sortedColumns.Free processor
263290 sortedRows.Free processor
264291
265292 reducedValues, reducedColumns, reducedRows
293+
0 commit comments