Skip to content

Commit f3c53a5

Browse files
[PECO-1260] Support results compression (#216)
* [PECO-1260] Support results compression Signed-off-by: Levko Kravets <levko.ne@gmail.com> * Fix existing tests Signed-off-by: Levko Kravets <levko.ne@gmail.com> * Add tests Signed-off-by: Levko Kravets <levko.ne@gmail.com> * Rename option to align with other connectors Signed-off-by: Levko Kravets <levko.ne@gmail.com> --------- Signed-off-by: Levko Kravets <levko.ne@gmail.com>
1 parent 5b01d59 commit f3c53a5

File tree

12 files changed

+327
-13
lines changed

12 files changed

+327
-13
lines changed

lib/DBSQLClient.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I
8484

8585
useCloudFetch: false,
8686
cloudFetchConcurrentDownloads: 10,
87+
88+
useLZ4Compression: true,
8789
};
8890
}
8991

lib/DBSQLOperation/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -377,14 +377,14 @@ export default class DBSQLOperation implements IOperation {
377377
case TSparkRowSetType.ARROW_BASED_SET:
378378
resultSource = new ArrowResultConverter(
379379
this.context,
380-
new ArrowResultHandler(this.context, this._data, metadata.arrowSchema),
380+
new ArrowResultHandler(this.context, this._data, metadata.arrowSchema, metadata.lz4Compressed),
381381
metadata.schema,
382382
);
383383
break;
384384
case TSparkRowSetType.URL_BASED_SET:
385385
resultSource = new ArrowResultConverter(
386386
this.context,
387-
new CloudFetchResultHandler(this.context, this._data),
387+
new CloudFetchResultHandler(this.context, this._data, metadata.lz4Compressed),
388388
metadata.schema,
389389
);
390390
break;

lib/DBSQLSession.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ export default class DBSQLSession implements IDBSQLSession {
184184
...getArrowOptions(clientConfig),
185185
canDownloadResult: options.useCloudFetch ?? clientConfig.useCloudFetch,
186186
parameters: getQueryParameters(this.sessionHandle, options.namedParameters, options.ordinalParameters),
187+
canDecompressLZ4Result: clientConfig.useLZ4Compression,
187188
});
188189
const response = await this.handleResponse(operationPromise);
189190
const operation = this.createOperation(response);

lib/contracts/IClientContext.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ export interface ClientConfig {
1515

1616
useCloudFetch: boolean;
1717
cloudFetchConcurrentDownloads: number;
18+
19+
useLZ4Compression: boolean;
1820
}
1921

2022
export default interface IClientContext {

lib/result/ArrowResultHandler.ts

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { Buffer } from 'buffer';
1+
import LZ4 from 'lz4';
22
import { TRowSet } from '../../thrift/TCLIService_types';
33
import IClientContext from '../contracts/IClientContext';
44
import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider';
@@ -10,10 +10,18 @@ export default class ArrowResultHandler implements IResultsProvider<Array<Buffer
1010

1111
private readonly arrowSchema?: Buffer;
1212

13-
constructor(context: IClientContext, source: IResultsProvider<TRowSet | undefined>, arrowSchema?: Buffer) {
13+
private readonly isLZ4Compressed: boolean;
14+
15+
constructor(
16+
context: IClientContext,
17+
source: IResultsProvider<TRowSet | undefined>,
18+
arrowSchema?: Buffer,
19+
isLZ4Compressed?: boolean,
20+
) {
1421
this.context = context;
1522
this.source = source;
1623
this.arrowSchema = arrowSchema;
24+
this.isLZ4Compressed = isLZ4Compressed ?? false;
1725
}
1826

1927
public async hasMore() {
@@ -31,9 +39,9 @@ export default class ArrowResultHandler implements IResultsProvider<Array<Buffer
3139
const rowSet = await this.source.fetchNext(options);
3240

3341
const batches: Array<Buffer> = [];
34-
rowSet?.arrowBatches?.forEach((arrowBatch) => {
35-
if (arrowBatch.batch) {
36-
batches.push(arrowBatch.batch);
42+
rowSet?.arrowBatches?.forEach(({ batch }) => {
43+
if (batch) {
44+
batches.push(this.isLZ4Compressed ? LZ4.decode(batch) : batch);
3745
}
3846
});
3947

lib/result/CloudFetchResultHandler.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { Buffer } from 'buffer';
1+
import LZ4 from 'lz4';
22
import fetch, { RequestInfo, RequestInit } from 'node-fetch';
33
import { TRowSet, TSparkArrowResultLink } from '../../thrift/TCLIService_types';
44
import IClientContext from '../contracts/IClientContext';
@@ -9,13 +9,16 @@ export default class CloudFetchResultHandler implements IResultsProvider<Array<B
99

1010
private readonly source: IResultsProvider<TRowSet | undefined>;
1111

12+
private readonly isLZ4Compressed: boolean;
13+
1214
private pendingLinks: Array<TSparkArrowResultLink> = [];
1315

1416
private downloadTasks: Array<Promise<Buffer>> = [];
1517

16-
constructor(context: IClientContext, source: IResultsProvider<TRowSet | undefined>) {
18+
constructor(context: IClientContext, source: IResultsProvider<TRowSet | undefined>, isLZ4Compressed?: boolean) {
1719
this.context = context;
1820
this.source = source;
21+
this.isLZ4Compressed = isLZ4Compressed ?? false;
1922
}
2023

2124
public async hasMore() {
@@ -42,7 +45,12 @@ export default class CloudFetchResultHandler implements IResultsProvider<Array<B
4245
}
4346

4447
const batch = await this.downloadTasks.shift();
45-
return batch ? [batch] : [];
48+
const batches = batch ? [batch] : [];
49+
50+
if (this.isLZ4Compressed) {
51+
return batches.map((buffer) => LZ4.decode(buffer));
52+
}
53+
return batches;
4654
}
4755

4856
private async downloadLink(link: TSparkArrowResultLink): Promise<Buffer> {

0 commit comments

Comments
 (0)