|
| 1 | +namespace GraphBLAS.FSharp.Backend.COOVector.Utilities.AssignSubVector |
| 2 | + |
| 3 | +open Brahma.OpenCL |
| 4 | +open Brahma.FSharp.OpenCL.WorkflowBuilder.Basic |
| 5 | +open Brahma.FSharp.OpenCL.WorkflowBuilder.Evaluation |
| 6 | +open GraphBLAS.FSharp.Backend.Common |
| 7 | + |
| 8 | +[<AutoOpen>] |
| 9 | +module internal Filter = |
| 10 | + let filter (leftIndices: int[]) (leftValues: 'a[]) (rightIndices: int[]) (rightValues: 'a[]) (bitmap: bool[]) : OpenCLEvaluation<int[] * 'a[] * int[]> = opencl { |
| 11 | + let workGroupSize = Utils.workGroupSize |
| 12 | + let firstSide = leftValues.Length |
| 13 | + let secondSide = rightIndices.Length |
| 14 | + let sumOfSides = firstSide + secondSide |
| 15 | + |
| 16 | + let merge = |
| 17 | + <@ |
| 18 | + fun (ndRange: _1D) |
| 19 | + (firstIndicesBuffer: int[]) |
| 20 | + (firstValuesBuffer: 'a[]) |
| 21 | + (secondIndicesBuffer: int[]) |
| 22 | + (secondValuesBuffer: 'a[]) |
| 23 | + (bitmapBuffer: bool[]) |
| 24 | + (allIndicesBuffer: int[]) |
| 25 | + (allValuesBuffer: 'a[]) |
| 26 | + (rawPositionsBuffer: int[]) -> |
| 27 | + |
| 28 | + let i = ndRange.GlobalID0 |
| 29 | + |
| 30 | + let mutable beginIdxLocal = local () |
| 31 | + let mutable endIdxLocal = local () |
| 32 | + let localID = ndRange.LocalID0 |
| 33 | + if localID < 2 then |
| 34 | + let mutable x = localID * (workGroupSize - 1) + i - 1 |
| 35 | + if x >= sumOfSides then x <- sumOfSides - 1 |
| 36 | + let diagonalNumber = x |
| 37 | + |
| 38 | + let mutable leftEdge = diagonalNumber + 1 - secondSide |
| 39 | + if leftEdge < 0 then leftEdge <- 0 |
| 40 | + |
| 41 | + let mutable rightEdge = firstSide - 1 |
| 42 | + if rightEdge > diagonalNumber then rightEdge <- diagonalNumber |
| 43 | + |
| 44 | + while leftEdge <= rightEdge do |
| 45 | + let middleIdx = (leftEdge + rightEdge) / 2 |
| 46 | + let firstIndex = firstIndicesBuffer.[middleIdx] |
| 47 | + let secondIndex = secondIndicesBuffer.[diagonalNumber - middleIdx] |
| 48 | + if firstIndex <= secondIndex then leftEdge <- middleIdx + 1 else rightEdge <- middleIdx - 1 |
| 49 | + |
| 50 | + // Here localID equals either 0 or 1 |
| 51 | + if localID = 0 then beginIdxLocal <- leftEdge else endIdxLocal <- leftEdge |
| 52 | + barrier () |
| 53 | + |
| 54 | + let beginIdx = beginIdxLocal |
| 55 | + let endIdx = endIdxLocal |
| 56 | + let firstLocalLength = endIdx - beginIdx |
| 57 | + let mutable x = workGroupSize - firstLocalLength |
| 58 | + if endIdx = firstSide then x <- secondSide - i + localID + beginIdx |
| 59 | + let secondLocalLength = x |
| 60 | + |
| 61 | + //First indices are from 0 to firstLocalLength - 1 inclusive |
| 62 | + //Second indices are from firstLocalLength to firstLocalLength + secondLocalLength - 1 inclusive |
| 63 | + let localIndices = localArray<int> workGroupSize |
| 64 | + |
| 65 | + if localID < firstLocalLength then |
| 66 | + localIndices.[localID] <- firstIndicesBuffer.[beginIdx + localID] |
| 67 | + if localID < secondLocalLength then |
| 68 | + localIndices.[firstLocalLength + localID] <- secondIndicesBuffer.[i - beginIdx] |
| 69 | + barrier () |
| 70 | + |
| 71 | + if i < sumOfSides then |
| 72 | + let mutable leftEdge = localID + 1 - secondLocalLength |
| 73 | + if leftEdge < 0 then leftEdge <- 0 |
| 74 | + |
| 75 | + let mutable rightEdge = firstLocalLength - 1 |
| 76 | + if rightEdge > localID then rightEdge <- localID |
| 77 | + |
| 78 | + while leftEdge <= rightEdge do |
| 79 | + let middleIdx = (leftEdge + rightEdge) / 2 |
| 80 | + let firstIndex = localIndices.[middleIdx] |
| 81 | + let secondIndex = localIndices.[firstLocalLength + localID - middleIdx] |
| 82 | + if firstIndex <= secondIndex then leftEdge <- middleIdx + 1 else rightEdge <- middleIdx - 1 |
| 83 | + |
| 84 | + let boundaryX = rightEdge |
| 85 | + let boundaryY = localID - leftEdge |
| 86 | + |
| 87 | + // boundaryX and boundaryY can't be off the right edge of array (only off the left edge) |
| 88 | + let isValidX = boundaryX >= 0 |
| 89 | + let isValidY = boundaryY >= 0 |
| 90 | + |
| 91 | + let mutable fstIdx = 0 |
| 92 | + if isValidX then fstIdx <- localIndices.[boundaryX] |
| 93 | + |
| 94 | + let mutable sndIdx = 0 |
| 95 | + if isValidY then sndIdx <- localIndices.[firstLocalLength + boundaryY] |
| 96 | + |
| 97 | + if not isValidX || isValidY && fstIdx <= sndIdx then |
| 98 | + allIndicesBuffer.[i] <- sndIdx |
| 99 | + if bitmapBuffer.[i - localID - beginIdx + boundaryY] then |
| 100 | + allValuesBuffer.[i] <- secondValuesBuffer.[i - localID - beginIdx + boundaryY] |
| 101 | + else |
| 102 | + rawPositionsBuffer.[i] <- 0 |
| 103 | + else |
| 104 | + allIndicesBuffer.[i] <- fstIdx |
| 105 | + allValuesBuffer.[i] <- firstValuesBuffer.[beginIdx + boundaryX] |
| 106 | + @> |
| 107 | + |
| 108 | + let resultValues = Array.create sumOfSides Unchecked.defaultof<'a> |
| 109 | + let resultIndices = Array.zeroCreate sumOfSides |
| 110 | + let rawPositions = Array.create sumOfSides 1 |
| 111 | + |
| 112 | + do! RunCommand merge <| fun kernelPrepare -> |
| 113 | + let ndRange = _1D(Utils.workSize sumOfSides, workGroupSize) |
| 114 | + kernelPrepare |
| 115 | + ndRange |
| 116 | + leftIndices |
| 117 | + leftValues |
| 118 | + rightIndices |
| 119 | + rightValues |
| 120 | + bitmap |
| 121 | + resultIndices |
| 122 | + resultValues |
| 123 | + rawPositions |
| 124 | + |
| 125 | + return resultIndices, resultValues, rawPositions |
| 126 | + } |
0 commit comments