@@ -7,7 +7,7 @@ open GraphBLAS.FSharp
77open GraphBLAS.FSharp .Backend .Common
88
99module internal Merge =
10- let run ( matrixLeft : COOFormat < 'a >) ( matrixRight : COOFormat < 'a >) ( mask : Mask2D option ) : OpenCLEvaluation < int [] * int [] * 'a []> = opencl {
10+ let runM ( matrixLeft : COOFormat < 'a >) ( matrixRight : COOFormat < 'a >) ( mask : Mask2D option ) : OpenCLEvaluation < int [] * int [] * 'a []> = opencl {
1111 let workGroupSize = Utils.workGroupSize
1212 let firstSide = matrixLeft.Values.Length
1313 let secondSide = matrixRight.Values.Length
@@ -125,3 +125,113 @@ module internal Merge =
125125
126126 return allRows, allColumns, allValues
127127 }
128+
129+ let runV ( leftIndices : int []) ( leftValues : 'a []) ( rightIndices : int []) ( rightValues : 'a []) ( mask : Mask1D option ) : OpenCLEvaluation < int [] * 'a []> = opencl {
130+ let workGroupSize = Utils.workGroupSize
131+ let firstSide = leftValues.Length
132+ let secondSide = rightValues.Length
133+ let sumOfSides = firstSide + secondSide
134+
135+ let merge =
136+ <@
137+ fun ( ndRange : _1D )
138+ ( firstIndicesBuffer : int [])
139+ ( firstValuesBuffer : 'a [])
140+ ( secondIndicesBuffer : int [])
141+ ( secondValuesBuffer : 'a [])
142+ ( allIndicesBuffer : int [])
143+ ( allValuesBuffer : 'a []) ->
144+
145+ let i = ndRange.GlobalID0
146+
147+ let mutable beginIdxLocal = local ()
148+ let mutable endIdxLocal = local ()
149+ let localID = ndRange.LocalID0
150+ if localID < 2 then
151+ let mutable x = localID * ( workGroupSize - 1 ) + i - 1
152+ if x >= sumOfSides then x <- sumOfSides - 1
153+ let diagonalNumber = x
154+
155+ let mutable leftEdge = diagonalNumber + 1 - secondSide
156+ if leftEdge < 0 then leftEdge <- 0
157+
158+ let mutable rightEdge = firstSide - 1
159+ if rightEdge > diagonalNumber then rightEdge <- diagonalNumber
160+
161+ while leftEdge <= rightEdge do
162+ let middleIdx = ( leftEdge + rightEdge) / 2
163+ let firstIndex = firstIndicesBuffer.[ middleIdx]
164+ let secondIndex = secondIndicesBuffer.[ diagonalNumber - middleIdx]
165+ if firstIndex < secondIndex then leftEdge <- middleIdx + 1 else rightEdge <- middleIdx - 1
166+
167+ // Here localID equals either 0 or 1
168+ if localID = 0 then beginIdxLocal <- leftEdge else endIdxLocal <- leftEdge
169+ barrier ()
170+
171+ let beginIdx = beginIdxLocal
172+ let endIdx = endIdxLocal
173+ let firstLocalLength = endIdx - beginIdx
174+ let mutable x = workGroupSize - firstLocalLength
175+ if endIdx = firstSide then x <- secondSide - i + localID + beginIdx
176+ let secondLocalLength = x
177+
178+ //First indices are from 0 to firstLocalLength - 1 inclusive
179+ //Second indices are from firstLocalLength to firstLocalLength + secondLocalLength - 1 inclusive
180+ let localIndices = localArray< int> workGroupSize
181+
182+ if localID < firstLocalLength then
183+ localIndices.[ localID] <- firstIndicesBuffer.[ beginIdx + localID]
184+ if localID < secondLocalLength then
185+ localIndices.[ firstLocalLength + localID] <- secondIndicesBuffer.[ i - beginIdx]
186+ barrier ()
187+
188+ if i < sumOfSides then
189+ let mutable leftEdge = localID + 1 - secondLocalLength
190+ if leftEdge < 0 then leftEdge <- 0
191+
192+ let mutable rightEdge = firstLocalLength - 1
193+ if rightEdge > localID then rightEdge <- localID
194+
195+ while leftEdge <= rightEdge do
196+ let middleIdx = ( leftEdge + rightEdge) / 2
197+ let firstIndex = localIndices.[ middleIdx]
198+ let secondIndex = localIndices.[ firstLocalLength + localID - middleIdx]
199+ if firstIndex < secondIndex then leftEdge <- middleIdx + 1 else rightEdge <- middleIdx - 1
200+
201+ let boundaryX = rightEdge
202+ let boundaryY = localID - leftEdge
203+
204+ // boundaryX and boundaryY can't be off the right edge of array (only off the left edge)
205+ let isValidX = boundaryX >= 0
206+ let isValidY = boundaryY >= 0
207+
208+ let mutable fstIdx = 0
209+ if isValidX then fstIdx <- localIndices.[ boundaryX]
210+
211+ let mutable sndIdx = 0
212+ if isValidY then sndIdx <- localIndices.[ firstLocalLength + boundaryY]
213+
214+ if not isValidX || isValidY && fstIdx < sndIdx then
215+ allIndicesBuffer.[ i] <- sndIdx
216+ allValuesBuffer.[ i] <- secondValuesBuffer.[ i - localID - beginIdx + boundaryY]
217+ else
218+ allIndicesBuffer.[ i] <- fstIdx
219+ allValuesBuffer.[ i] <- firstValuesBuffer.[ beginIdx + boundaryX]
220+ @>
221+
222+ let allIndices = Array.zeroCreate sumOfSides
223+ let allValues = Array.create sumOfSides Unchecked.defaultof< 'a>
224+
225+ do ! RunCommand merge <| fun kernelPrepare ->
226+ let ndRange = _ 1D( Utils.workSize sumOfSides, workGroupSize)
227+ kernelPrepare
228+ ndRange
229+ leftIndices
230+ leftValues
231+ rightIndices
232+ rightValues
233+ allIndices
234+ allValues
235+
236+ return allIndices, allValues
237+ }
0 commit comments