1515import * as arrow from 'apache-arrow' ;
1616
1717import { Column } from '../column' ;
18+ import { Table } from '../table' ;
1819import {
1920 Bool8 ,
2021 Categorical ,
@@ -40,61 +41,60 @@ import {
4041import { ArrowToCUDFType } from '../types/mappings' ;
4142
4243/** @ignore */
43- interface VectorToColumnVisitor extends arrow . Visitor {
44- visit < T extends arrow . DataType > ( node : arrow . Vector < T > ) : Column < ArrowToCUDFType < T > > ;
45- visitMany < T extends arrow . DataType > ( nodes : arrow . Vector < T > [ ] ) : Column < ArrowToCUDFType < T > > [ ] ;
46- getVisitFn < T extends arrow . DataType > ( node : arrow . Vector < T > ) : ( ) => Column < ArrowToCUDFType < T > > ;
44+ interface DataToColumnVisitor extends arrow . Visitor {
45+ visit < T extends arrow . DataType > ( node : arrow . Data < T > ) : Column < ArrowToCUDFType < T > > ;
46+ visitMany < T extends arrow . DataType > ( nodes : readonly arrow . Data < T > [ ] ) :
47+ Column < ArrowToCUDFType < T > > [ ] ;
48+ getVisitFn < T extends arrow . DataType > ( node : arrow . Data < T > ) : ( ) => Column < ArrowToCUDFType < T > > ;
4749}
4850
49- class VectorToColumnVisitor extends arrow . Visitor {
50- // visitNull<T extends arrow.Null>(vector: arrow.Vector<T>) {}
51- visitBool < T extends arrow . Bool > ( vector : arrow . Vector < T > ) {
52- const { nullBitmap : nullMask } = vector . data ;
53- return new Column ( { type : new Bool8 , data : new Uint8Array ( vector ) , nullMask} ) ;
54- }
55- visitInt8 < T extends arrow . Int8 > ( { length,
56- data : { values : data , nullBitmap : nullMask } } : arrow . Vector < T > ) {
51+ class DataToColumnVisitor extends arrow . Visitor {
52+ // visitNull<T extends arrow.Null>(data: arrow.Data<T>) {}
53+ visitBool < T extends arrow . Bool > ( data : arrow . Data < T > ) {
54+ const { values, nullBitmap : nullMask } = data ;
55+ return new Column ( {
56+ type : new Bool8 ,
57+ data :
58+ // eslint-disable-next-line @typescript-eslint/unbound-method
59+ new Uint8Array ( new arrow . util . BitIterator ( values , 0 , data . length , null , arrow . util . getBit ) ) ,
60+ nullMask
61+ } ) ;
62+ }
63+ visitInt8 < T extends arrow . Int8 > ( { length, values : data , nullBitmap : nullMask } : arrow . Data < T > ) {
5764 return new Column ( { type : new Int8 , length, data : data . subarray ( 0 , length ) , nullMask} ) ;
5865 }
59- visitInt16 < T extends arrow . Int16 > ( { length,
60- data : { values : data , nullBitmap : nullMask } } : arrow . Vector < T > ) {
66+ visitInt16 < T extends arrow . Int16 > ( { length, values : data , nullBitmap : nullMask } : arrow . Data < T > ) {
6167 return new Column ( { type : new Int16 , length, data : data . subarray ( 0 , length ) , nullMask} ) ;
6268 }
63- visitInt32 < T extends arrow . Int32 > ( { length,
64- data : { values : data , nullBitmap : nullMask } } : arrow . Vector < T > ) {
69+ visitInt32 < T extends arrow . Int32 > ( { length, values : data , nullBitmap : nullMask } : arrow . Data < T > ) {
6570 return new Column ( { type : new Int32 , length, data : data . subarray ( 0 , length ) , nullMask} ) ;
6671 }
67- visitInt64 < T extends arrow . Int64 > ( { length,
68- data : { values : data , nullBitmap : nullMask } } : arrow . Vector < T > ) {
72+ visitInt64 < T extends arrow . Int64 > ( { length, values : data , nullBitmap : nullMask } : arrow . Data < T > ) {
6973 return new Column ( { type : new Int64 , length, data : data . subarray ( 0 , length * 2 ) , nullMask} ) ;
7074 }
71- visitUint8 < T extends arrow . Uint8 > ( { length,
72- data : { values : data , nullBitmap : nullMask } } : arrow . Vector < T > ) {
75+ visitUint8 < T extends arrow . Uint8 > ( { length, values : data , nullBitmap : nullMask } : arrow . Data < T > ) {
7376 return new Column ( { type : new Uint8 , length, data : data . subarray ( 0 , length ) , nullMask} ) ;
7477 }
75- visitUint16 < T extends arrow . Uint16 > ( { length, data : { values : data , nullBitmap : nullMask } } :
76- arrow . Vector < T > ) {
78+ visitUint16 < T extends arrow . Uint16 > ( { length, values : data , nullBitmap : nullMask } : arrow . Data < T > ) {
7779 return new Column ( { type : new Uint16 , length, data : data . subarray ( 0 , length ) , nullMask} ) ;
7880 }
79- visitUint32 < T extends arrow . Uint32 > ( { length, data : { values : data , nullBitmap : nullMask } } :
80- arrow . Vector < T > ) {
81+ visitUint32 < T extends arrow . Uint32 > ( { length, values : data , nullBitmap : nullMask } : arrow . Data < T > ) {
8182 return new Column ( { type : new Uint32 , length, data : data . subarray ( 0 , length ) , nullMask} ) ;
8283 }
83- visitUint64 < T extends arrow . Uint64 > ( { length, data : { values : data , nullBitmap : nullMask } } :
84- arrow . Vector < T > ) {
84+ visitUint64 < T extends arrow . Uint64 > ( { length, values : data , nullBitmap : nullMask } : arrow . Data < T > ) {
8585 return new Column ( { type : new Uint64 , length, data : data . subarray ( 0 , length * 2 ) , nullMask} ) ;
8686 }
87- // visitFloat16<T extends arrow.Float16>(vector : arrow.Vector <T>) {}
88- visitFloat32 < T extends arrow . Float32 > ( { length, data : { values : data , nullBitmap : nullMask } } :
89- arrow . Vector < T > ) {
87+ // visitFloat16<T extends arrow.Float16>(data : arrow.Data <T>) {}
88+ visitFloat32 < T extends arrow . Float32 > ( { length, values : data , nullBitmap : nullMask } :
89+ arrow . Data < T > ) {
9090 return new Column ( { type : new Float32 , length, data : data . subarray ( 0 , length ) , nullMask} ) ;
9191 }
92- visitFloat64 < T extends arrow . Float64 > ( { length, data : { values : data , nullBitmap : nullMask } } :
93- arrow . Vector < T > ) {
92+ visitFloat64 < T extends arrow . Float64 > ( { length, values : data , nullBitmap : nullMask } :
93+ arrow . Data < T > ) {
9494 return new Column ( { type : new Float64 , length, data : data . subarray ( 0 , length ) , nullMask} ) ;
9595 }
96- visitUtf8 < T extends arrow . Utf8 > ( { length, data : { values, valueOffsets, nullBitmap : nullMask } } :
97- arrow . Vector < T > ) {
96+ visitUtf8 < T extends arrow . Utf8 > ( { length, values, valueOffsets, nullBitmap : nullMask } :
97+ arrow . Data < T > ) {
9898 return new Column ( {
9999 length,
100100 type : new Utf8String ,
@@ -112,48 +112,48 @@ class VectorToColumnVisitor extends arrow.Visitor {
112112 ]
113113 } ) ;
114114 }
115- // visitBinary<T extends arrow.Binary>(vector : arrow.Vector <T>) {}
116- // visitFixedSizeBinary<T extends arrow.FixedSizeBinary>(vector : arrow.Vector <T>) {}
117- // visitDate<T extends arrow.Date_>(vector : arrow.Vector <T>) {}
118- visitDateDay < T extends arrow . DateDay > ( { length, data : { values : data , nullBitmap : nullMask } } :
119- arrow . Vector < T > ) {
115+ // visitBinary<T extends arrow.Binary>(data : arrow.Data <T>) {}
116+ // visitFixedSizeBinary<T extends arrow.FixedSizeBinary>(data : arrow.Data <T>) {}
117+ // visitDate<T extends arrow.Date_>(data : arrow.Data <T>) {}
118+ visitDateDay < T extends arrow . DateDay > ( { length, values : data , nullBitmap : nullMask } :
119+ arrow . Data < T > ) {
120120 return new Column ( { type : new TimestampDay , length, data : data . subarray ( 0 , length ) , nullMask} ) ;
121121 }
122122 visitDateMillisecond < T extends arrow . DateMillisecond > (
123- { length, data : { values : data , nullBitmap : nullMask } } : arrow . Vector < T > ) {
123+ { length, values : data , nullBitmap : nullMask } : arrow . Data < T > ) {
124124 return new Column (
125125 { type : new TimestampMillisecond , length, data : data . subarray ( 0 , length ) , nullMask} ) ;
126126 }
127127 visitTimestampSecond < T extends arrow . TimestampSecond > (
128- { length, data : { values : data , nullBitmap : nullMask } } : arrow . Vector < T > ) {
128+ { length, values : data , nullBitmap : nullMask } : arrow . Data < T > ) {
129129 return new Column (
130130 { type : new TimestampSecond , length, data : data . subarray ( 0 , length * 2 ) , nullMask} ) ;
131131 }
132132 visitTimestampMillisecond < T extends arrow . TimestampMillisecond > (
133- { length, data : { values : data , nullBitmap : nullMask } } : arrow . Vector < T > ) {
133+ { length, values : data , nullBitmap : nullMask } : arrow . Data < T > ) {
134134 return new Column (
135135 { type : new TimestampMillisecond , length, data : data . subarray ( 0 , length * 2 ) , nullMask} ) ;
136136 }
137137 visitTimestampMicrosecond < T extends arrow . TimestampMicrosecond > (
138- { length, data : { values : data , nullBitmap : nullMask } } : arrow . Vector < T > ) {
138+ { length, values : data , nullBitmap : nullMask } : arrow . Data < T > ) {
139139 return new Column (
140140 { type : new TimestampMicrosecond , length, data : data . subarray ( 0 , length * 2 ) , nullMask} ) ;
141141 }
142142 visitTimestampNanosecond < T extends arrow . TimestampNanosecond > (
143- { length, data : { values : data , nullBitmap : nullMask } } : arrow . Vector < T > ) {
143+ { length, values : data , nullBitmap : nullMask } : arrow . Data < T > ) {
144144 return new Column (
145145 { type : new TimestampNanosecond , length, data : data . subarray ( 0 , length * 2 ) , nullMask} ) ;
146146 }
147- // visitTimeSecond<T extends arrow.TimeSecond>(vector : arrow.Vector <T>) {}
148- // visitTimeMillisecond<T extends arrow.TimeMillisecond>(vector : arrow.Vector <T>) {}
149- // visitTimeMicrosecond<T extends arrow.TimeMicrosecond>(vector : arrow.Vector <T>) {}
150- // visitTimeNanosecond<T extends arrow.TimeNanosecond>(vector : arrow.Vector <T>) {}
151- // visitDecimal<T extends arrow.Decimal>(vector : arrow.Vector <T>) {}
152- visitList < T extends arrow . List > ( vector : arrow . Vector < T > ) {
153- const { type, length, data : { valueOffsets, nullBitmap : nullMask } } = vector ;
147+ // visitTimeSecond<T extends arrow.TimeSecond>(data : arrow.Data <T>) {}
148+ // visitTimeMillisecond<T extends arrow.TimeMillisecond>(data : arrow.Data <T>) {}
149+ // visitTimeMicrosecond<T extends arrow.TimeMicrosecond>(data : arrow.Data <T>) {}
150+ // visitTimeNanosecond<T extends arrow.TimeNanosecond>(data : arrow.Data <T>) {}
151+ // visitDecimal<T extends arrow.Decimal>(data : arrow.Data <T>) {}
152+ visitList < T extends arrow . List > ( data : arrow . Data < T > ) {
153+ const { type, length, valueOffsets, nullBitmap : nullMask } = data ;
154154 const offsets =
155155 new Column ( { type : new Int32 , length : length + 1 , data : valueOffsets . subarray ( 0 , length + 1 ) } ) ;
156- const elements = this . visit ( vector . getChildAt ( 0 ) as arrow . Vector < T [ 'valueType' ] > ) ;
156+ const elements = this . visit ( data . children [ 0 ] as arrow . Data < T [ 'valueType' ] > ) ;
157157 return new Column ( {
158158 length,
159159 type : new List ( type . children [ 0 ] . clone ( { type : elements . type , nullable : elements . nullable } ) ) ,
@@ -164,9 +164,9 @@ class VectorToColumnVisitor extends arrow.Visitor {
164164 ]
165165 } ) ;
166166 }
167- visitStruct < T extends arrow . Struct > ( vector : arrow . Vector < T > ) {
168- const { type, length, data : { nullBitmap : nullMask } } = vector ;
169- const children = type . children . map ( ( _ , i ) => this . visit ( vector . getChildAt ( i ) as arrow . Vector ) ) ;
167+ visitStruct < T extends arrow . Struct > ( data : arrow . Data < T > ) {
168+ const { type, length, nullBitmap : nullMask } = data ;
169+ const children = type . children . map ( ( _ , i ) => this . visit ( data . children [ i ] ) ) ;
170170 return new Column ( {
171171 length,
172172 type : new Struct ( children . map (
@@ -175,25 +175,28 @@ class VectorToColumnVisitor extends arrow.Visitor {
175175 children,
176176 } ) ;
177177 }
178- // visitDenseUnion<T extends arrow.DenseUnion>(vector : arrow.Vector <T>) {}
179- // visitSparseUnion<T extends arrow.SparseUnion>(vector : arrow.Vector <T>) {}
180- visitDictionary < T extends arrow . Dictionary > ( vector : arrow . Vector < T > ) {
181- const { type, length, data : { nullBitmap : nullMask } } = vector ;
182- const codes = this . visit ( arrow . Vector . new ( vector . data . clone ( type . indices ) ) ) . cast ( new Uint32 ) ;
178+ // visitDenseUnion<T extends arrow.DenseUnion>(data : arrow.Data <T>) {}
179+ // visitSparseUnion<T extends arrow.SparseUnion>(data : arrow.Data <T>) {}
180+ visitDictionary < T extends arrow . Dictionary > ( data : arrow . Data < T > ) {
181+ const { type, length, nullBitmap : nullMask } = data ;
182+ const codes = this . visit ( data . clone ( type . indices ) ) . cast ( new Uint32 ) ;
183183 // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
184- const categories = this . visit ( vector . data . dictionary ! ) ;
184+ const categories = fromArrow ( data . dictionary ! ) ;
185185 return new Column (
186186 { length, type : new Categorical ( categories . type ) , nullMask, children : [ codes , categories ] } ) ;
187187 }
188- // visitIntervalDayTime<T extends arrow.IntervalDayTime>(vector : arrow.Vector <T>) {}
189- // visitIntervalYearMonth<T extends arrow.IntervalYearMonth>(vector : arrow.Vector <T>) {}
190- // visitFixedSizeList<T extends arrow.FixedSizeList>(vector : arrow.Vector <T>) {}
191- // visitMap<T extends arrow.Map_>(vector : arrow.Vector <T>) {}
188+ // visitIntervalDayTime<T extends arrow.IntervalDayTime>(data : arrow.Data <T>) {}
189+ // visitIntervalYearMonth<T extends arrow.IntervalYearMonth>(data : arrow.Data <T>) {}
190+ // visitFixedSizeList<T extends arrow.FixedSizeList>(data : arrow.Data <T>) {}
191+ // visitMap<T extends arrow.Map_>(data : arrow.Data <T>) {}
192192}
193193
194- const visitor = new VectorToColumnVisitor ( ) ;
194+ const visitor = new DataToColumnVisitor ( ) ;
195195
196196export function fromArrow < T extends arrow . DataType > ( vector : arrow . Vector < T > ) :
197197 Column < ArrowToCUDFType < T > > {
198- return visitor . visit ( vector ) ;
198+ const cols = visitor . visitMany ( vector . data ) ;
199+ if ( cols . length === 1 ) { return cols [ 0 ] ; }
200+ return Table . concat ( cols . map ( ( col ) => new Table ( { columns : [ col ] } ) ) ) . getColumnByIndex ( 0 ) ;
201+ // return visitor.visit(vector);
199202}
0 commit comments