@@ -16,7 +16,7 @@ import {
1616import { TGetResultSetMetadataResp , TColumnDesc } from '../../thrift/TCLIService_types' ;
1717import IClientContext from '../contracts/IClientContext' ;
1818import IResultsProvider , { ResultsProviderFetchNextOptions } from './IResultsProvider' ;
19- import { getSchemaColumns , convertThriftValue } from './utils' ;
19+ import { ArrowBatch , getSchemaColumns , convertThriftValue } from './utils' ;
2020
2121const { isArrowBigNumSymbol, bigNumToBigInt } = arrowUtils ;
2222
@@ -26,15 +26,23 @@ type ArrowSchemaField = Field<DataType<Type, TypeMap>>;
2626export default class ArrowResultConverter implements IResultsProvider < Array < any > > {
2727 protected readonly context : IClientContext ;
2828
29- private readonly source : IResultsProvider < Array < Buffer > > ;
29+ private readonly source : IResultsProvider < ArrowBatch > ;
3030
3131 private readonly schema : Array < TColumnDesc > ;
3232
33- private reader ?: IterableIterator < RecordBatch < TypeMap > > ;
33+ private recordBatchReader ?: IterableIterator < RecordBatch < TypeMap > > ;
3434
35- private pendingRecordBatch ?: RecordBatch < TypeMap > ;
35+ // Remaining rows in current Arrow batch (not the record batch!)
36+ private remainingRows : number = 0 ;
3637
37- constructor ( context : IClientContext , source : IResultsProvider < Array < Buffer > > , { schema } : TGetResultSetMetadataResp ) {
38+ // This is the next (!!) record batch to be read. It is unset only in two cases:
39+ // - prior to the first call to `fetchNext`
40+ // - when no more data available
41+ // This field is primarily used by a `hasMore`, so it can tell if next `fetchNext` will
42+ // actually return a non-empty result
43+ private prefetchedRecordBatch ?: RecordBatch < TypeMap > ;
44+
45+ constructor ( context : IClientContext , source : IResultsProvider < ArrowBatch > , { schema } : TGetResultSetMetadataResp ) {
3846 this . context = context ;
3947 this . source = source ;
4048 this . schema = getSchemaColumns ( schema ) ;
@@ -44,7 +52,7 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
4452 if ( this . schema . length === 0 ) {
4553 return false ;
4654 }
47- if ( this . pendingRecordBatch ) {
55+ if ( this . prefetchedRecordBatch ) {
4856 return true ;
4957 }
5058 return this . source . hasMore ( ) ;
@@ -55,47 +63,80 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
5563 return [ ] ;
5664 }
5765
58- // eslint-disable-next-line no-constant-condition
59- while ( true ) {
60- // It's not possible to know if iterator has more items until trying
61- // to get the next item. But we need to know if iterator is empty right
62- // after getting the next item. Therefore, after creating the iterator,
63- // we get one item more and store it in `pendingRecordBatch`. Next time,
64- // we use that stored item, and prefetch the next one. Prefetched item
65- // is therefore the next item we are going to return, so it can be used
66- // to know if we actually can return anything next time
67- const recordBatch = this . pendingRecordBatch ;
68- this . pendingRecordBatch = this . prefetch ( ) ;
69-
70- if ( recordBatch ) {
71- const table = new Table ( recordBatch ) ;
72- return this . getRows ( table . schema , table . toArray ( ) ) ;
66+ // It's not possible to know if iterator has more items until trying to get the next item.
67+ // So each time we read one batch ahead and store it, but process the batch prefetched on
68+ // a previous `fetchNext` call. Because we actually already have the next item - it's easy
69+ // to tell if the subsequent `fetchNext` will be able to read anything, and `hasMore` logic
70+ // becomes trivial
71+
72+ // This prefetch handles a first call to `fetchNext`, when all the internal fields are not initialized yet.
73+ // On subsequent calls to `fetchNext` it will do nothing
74+ await this . prefetch ( options ) ;
75+
76+ if ( this . prefetchedRecordBatch ) {
77+ // Consume a record batch fetched during previous call to `fetchNext`
78+ const table = new Table ( this . prefetchedRecordBatch ) ;
79+ this . prefetchedRecordBatch = undefined ;
80+ // Get table rows, but not more than remaining count
81+ const arrowRows = table . toArray ( ) . slice ( 0 , this . remainingRows ) ;
82+ const result = this . getRows ( table . schema , arrowRows ) ;
83+
84+ // Reduce remaining rows count by a count of rows we just processed.
85+ // If the remaining count reached zero - we're done with current arrow
86+ // batch, so discard the batch reader
87+ this . remainingRows -= result . length ;
88+ if ( this . remainingRows === 0 ) {
89+ this . recordBatchReader = undefined ;
7390 }
7491
75- // eslint-disable-next-line no-await-in-loop
76- const batches = await this . source . fetchNext ( options ) ;
77- if ( batches . length === 0 ) {
78- this . reader = undefined ;
79- break ;
80- }
92+ // Prefetch the next record batch
93+ await this . prefetch ( options ) ;
8194
82- const reader = RecordBatchReader . from < TypeMap > ( batches ) ;
83- this . reader = reader [ Symbol . iterator ] ( ) ;
84- this . pendingRecordBatch = this . prefetch ( ) ;
95+ return result ;
8596 }
8697
8798 return [ ] ;
8899 }
89100
90- private prefetch ( ) : RecordBatch < TypeMap > | undefined {
91- const item = this . reader ?. next ( ) ?? { done : true , value : undefined } ;
101+ // This method tries to read one more record batch and store it in `prefetchedRecordBatch` field.
102+ // If `prefetchedRecordBatch` is already non-empty - the method does nothing.
103+ // This method pulls the next item from source if needed, initializes a record batch reader and
104+ // gets the next item from it - until either reaches end of data or finds a non-empty record batch
105+ private async prefetch ( options : ResultsProviderFetchNextOptions ) {
106+ // This loop will be executed until a next non-empty record batch is retrieved
107+ // Another implicit loop condition (end of data) is checked in the loop body
108+ while ( ! this . prefetchedRecordBatch ) {
109+ // First, try to fetch next item from source and initialize record batch reader.
110+ // If source has no more data - exit prematurely
111+ if ( ! this . recordBatchReader ) {
112+ const sourceHasMore = await this . source . hasMore ( ) ; // eslint-disable-line no-await-in-loop
113+ if ( ! sourceHasMore ) {
114+ return ;
115+ }
116+
117+ const arrowBatch = await this . source . fetchNext ( options ) ; // eslint-disable-line no-await-in-loop
118+ if ( arrowBatch . batches . length > 0 && arrowBatch . rowCount > 0 ) {
119+ const reader = RecordBatchReader . from < TypeMap > ( arrowBatch . batches ) ;
120+ this . recordBatchReader = reader [ Symbol . iterator ] ( ) ;
121+ this . remainingRows = arrowBatch . rowCount ;
122+ }
123+ }
92124
93- if ( item . done || item . value === undefined ) {
94- this . reader = undefined ;
95- return undefined ;
125+ // Try to get a next item from current record batch reader. The reader may be unavailable at this point -
126+ // in this case we fall back to a "done" state, and the `while` loop will do one more iteration attempting
127+ // to create a new reader. Eventually it will either succeed or reach end of source. This scenario also
128+ // handles readers which are already empty
129+ const item = this . recordBatchReader ?. next ( ) ?? { done : true , value : undefined } ;
130+ if ( item . done || item . value === undefined ) {
131+ this . recordBatchReader = undefined ;
132+ } else {
133+ // Skip empty batches
134+ // eslint-disable-next-line no-lonely-if
135+ if ( item . value . numRows > 0 ) {
136+ this . prefetchedRecordBatch = item . value ;
137+ }
138+ }
96139 }
97-
98- return item . value ;
99140 }
100141
101142 private getRows ( schema : ArrowSchema , rows : Array < StructRow | MapRow > ) : Array < any > {
0 commit comments