-
Notifications
You must be signed in to change notification settings - Fork 108
Expand file tree
/
Copy pathSparkSqlParser.g4
More file actions
1975 lines (1784 loc) · 55.4 KB
/
SparkSqlParser.g4
File metadata and controls
1975 lines (1784 loc) · 55.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
/**
* This file is an adaptation of spark's spark/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 grammar.
* Reference: https://github.com/apache/spark/blob/v3.5.0/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
*/
// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false
// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging
// $antlr-format spaceBeforeAssignmentOperators false, keepEmptyLinesAtTheStartOfBlocks true
parser grammar SparkSqlParser;
options {
tokenVocab=SparkSqlLexer;
caseInsensitive= true;
superClass=SQLParserBase;
}
@header {
import { SQLParserBase } from '../SQLParserBase';
}
program
: singleStatement* EOF
;
singleStatement
: statement SEMICOLON?
;
statement
: query # statementDefault
| ctes? dmlStatementNoWith # dmlStatement
| KW_USE namespace? namespaceName # useNamespace
| KW_SET KW_CATALOG (identifier | stringLit) # setCatalog
| KW_CREATE namespace (ifNotExists)? namespaceNameCreate (
(KW_COMMENT comment=stringLit)
| (KW_LOCATION stringLit)
| (KW_WITH (KW_DBPROPERTIES | KW_PROPERTIES) propertyList)
)* # createNamespace
| KW_ALTER namespace namespaceName KW_SET (KW_DBPROPERTIES | KW_PROPERTIES) propertyList # setNamespaceProperties
| KW_ALTER namespace namespaceName KW_SET KW_LOCATION stringLit # setNamespaceLocation
| KW_DROP namespace (ifExists)? namespaceName (KW_RESTRICT | KW_CASCADE)? # dropNamespace
| KW_SHOW (KW_NAMESPACES | KW_DATABASES | KW_SCHEMAS) ((KW_FROM | KW_IN) multipartIdentifier)? (
KW_LIKE? pattern=stringLit
)? # showNamespaces
| KW_CREATE KW_TEMPORARY? KW_EXTERNAL? KW_TABLE (ifNotExists)? tableNameCreate (
LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN
)? tableProvider? createTableClauses (KW_AS? query)? # createTable
| KW_CREATE KW_TABLE (ifNotExists)? target=tableNameCreate KW_LIKE source=tableName (
tableProvider
| rowFormat
| createFileFormat
| (KW_LOCATION stringLit)
| (KW_TBLPROPERTIES tableProps=propertyList)
| tableLifecycle
)* # createTableLike
| (KW_CREATE KW_OR)? KW_REPLACE KW_TABLE tableNameCreate (
LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN
)? tableProvider? createTableClauses (KW_AS? query)? # replaceTable
| KW_ANALYZE KW_TABLE tableName partitionSpec? KW_COMPUTE KW_STATISTICS (
KW_NOSCAN
| KW_FOR KW_COLUMNS columnNameSeq
| KW_FOR KW_ALL KW_COLUMNS
)? # analyze
| KW_ANALYZE KW_TABLES ((KW_FROM | KW_IN) namespaceName)? KW_COMPUTE KW_STATISTICS (KW_NOSCAN)? # analyzeTables
| KW_ALTER KW_TABLE tableName KW_ADD KW_COLUMN qualifiedColTypeWithPositionForAdd # alterTableAddColumn
| KW_ALTER KW_TABLE tableName KW_ADD KW_COLUMNS LEFT_PAREN qualifiedColTypeWithPositionSeqForAdd RIGHT_PAREN # alterTableAddColumns
| KW_ALTER KW_TABLE table=tableName KW_RENAME KW_COLUMN columnName KW_TO columnNameCreate # renameTableColumn
| KW_ALTER KW_TABLE tableName KW_DROP KW_COLUMN (ifExists)? columnName # alterTableDropColumn
| KW_ALTER KW_TABLE tableName KW_DROP KW_COLUMNS (ifExists)? LEFT_PAREN columnNameSeq RIGHT_PAREN # dropTableColumns
| KW_ALTER (KW_TABLE tableName | KW_VIEW viewName) KW_RENAME KW_TO multipartIdentifier # renameTable
| KW_ALTER (KW_TABLE tableName | KW_VIEW viewName) KW_SET KW_TBLPROPERTIES propertyList # setTableProperties
| KW_ALTER (KW_TABLE tableName | KW_VIEW viewName) KW_UNSET KW_TBLPROPERTIES (ifExists)? propertyList # unsetTableProperties
| KW_ALTER KW_TABLE table=tableName (KW_ALTER | KW_CHANGE) KW_COLUMN? column=columnName alterColumnAction? # alterTableAlterColumn
| KW_ALTER KW_TABLE table=tableName partitionSpec? KW_CHANGE KW_COLUMN? colName=columnName columnType colPosition? # hiveChangeColumn
| KW_ALTER KW_TABLE table=tableName partitionSpec? KW_REPLACE KW_COLUMNS LEFT_PAREN qualifiedColTypeWithPositionSeqForReplace RIGHT_PAREN #
hiveReplaceColumns
| KW_ALTER KW_TABLE tableName (partitionSpec)? KW_SET KW_SERDE stringLit (
KW_WITH KW_SERDEPROPERTIES propertyList
)? # setTableSerDe
| KW_ALTER KW_TABLE tableName (partitionSpec)? KW_SET KW_SERDEPROPERTIES propertyList # setTableSerDeProperties
| KW_ALTER (KW_TABLE tableName | KW_VIEW viewName) KW_ADD (ifNotExists)? partitionSpecLocation+ # addTablePartition
| KW_ALTER KW_TABLE tableName partitionSpec KW_RENAME KW_TO partitionSpec # renameTablePartition
| KW_ALTER (KW_TABLE tableName | KW_VIEW viewName) KW_DROP (ifExists)? partitionSpec (
COMMA partitionSpec
)* KW_PURGE? # dropTablePartitions
| KW_ALTER KW_TABLE tableName (partitionSpec)? KW_SET KW_LOCATION stringLit # setTableLocation
| KW_ALTER KW_TABLE tableName KW_RECOVER KW_PARTITIONS # recoverPartitions
| KW_ALTER KW_MATERIALIZED KW_VIEW viewName (KW_ENABLE | KW_DISABLE) KW_REWRITE # alterMaterializedViewRewrite
| KW_ALTER KW_MATERIALIZED KW_VIEW viewName KW_SET KW_TBLPROPERTIES propertyList # alterMaterializedViewProperties
| KW_DROP KW_TABLE (ifExists)? tableName KW_PURGE? # dropTable
| KW_DROP KW_VIEW (ifExists)? viewName # dropView
| KW_DROP KW_MATERIALIZED KW_VIEW (ifExists)? viewName # dropMaterializedView
| KW_CREATE (KW_OR KW_REPLACE)? (KW_GLOBAL? KW_TEMPORARY)? KW_VIEW (ifNotExists)? viewNameCreate identifierCommentList? (
(KW_COMMENT comment=stringLit)
| (KW_PARTITIONED KW_ON identifierList)
| (KW_TBLPROPERTIES propertyList)
)* KW_AS query # createView
| KW_CREATE (KW_OR KW_REPLACE)? KW_GLOBAL? KW_TEMPORARY KW_VIEW viewNameCreate (
LEFT_PAREN colTypeList RIGHT_PAREN
)? tableProvider (KW_OPTIONS propertyList)? # createTempViewUsing
| KW_ALTER KW_VIEW viewName KW_AS? query # alterViewQuery
| KW_CREATE (KW_OR KW_REPLACE)? KW_TEMPORARY? KW_FUNCTION (ifNotExists)? functionNameCreate KW_AS className=stringLit (
KW_USING (identifier stringLit) (COMMA (identifier stringLit))*
)? # createFunction
|
// Self developed materialized view syntax by dtstack, spark not support now.
KW_CREATE KW_MATERIALIZED KW_VIEW (ifNotExists)? viewNameCreate tableProvider? (
(KW_OPTIONS options=propertyList)
| (KW_PARTITIONED KW_BY partitioning=partitionFieldList)
| skewSpec
| bucketSpec
| rowFormat
| createFileFormat
| (KW_LOCATION stringLit)
| (KW_COMMENT comment=stringLit)
| (KW_TBLPROPERTIES tableProps=propertyList)
)* KW_AS query # createMaterializedView
| KW_DROP KW_TEMPORARY? KW_FUNCTION (ifExists)? functionName # dropFunction
| KW_DECLARE (KW_OR KW_REPLACE)? KW_VARIABLE? functionName dataType? variableDefaultExpression? # declareVariable
| KW_DROP KW_TEMPORARY KW_VARIABLE (ifExists)? (tableName | viewName | functionName) # dropVariable
| KW_EXPLAIN (KW_LOGICAL | KW_FORMATTED | KW_EXTENDED | KW_CODEGEN | KW_COST)? statement # explainStatement
| KW_SHOW KW_TABLES ((KW_FROM | KW_IN) namespaceName)? (KW_LIKE? pattern=stringLit)? # showTables
| KW_SHOW KW_TABLE KW_EXTENDED ((KW_FROM | KW_IN) ns=namespaceName)? KW_LIKE pattern=stringLit partitionSpec? # showTableExtended
| KW_SHOW KW_TBLPROPERTIES table=tableName (LEFT_PAREN key=propertyKey RIGHT_PAREN)? # showTblProperties
| KW_SHOW KW_COLUMNS (KW_FROM | KW_IN) table=tableName ((KW_FROM | KW_IN) namespaceName)? # showColumns
| KW_SHOW KW_VIEWS ((KW_FROM | KW_IN) namespaceName)? (KW_LIKE? pattern=stringLit)? # showViews
| KW_SHOW KW_PARTITIONS tableName partitionSpec? # showPartitions
| KW_SHOW (KW_USER | KW_SYSTEM | KW_ALL)? KW_FUNCTIONS ((KW_FROM | KW_IN) ns=namespaceName)? (
KW_LIKE? (legacy=multipartIdentifier | pattern=stringLit)
)? # showFunctions
| KW_SHOW KW_CREATE KW_TABLE tableName (KW_AS KW_SERDE)? # showCreateTable
| KW_SHOW KW_CURRENT namespace # showCurrentNamespace
| KW_SHOW KW_CATALOGS (KW_LIKE? pattern=stringLit)? # showCatalogs
| KW_SHOW KW_MATERIALIZED KW_VIEWS ((KW_FROM | KW_IN) db_name=namespaceName)? (
KW_LIKE? pattern=stringLit
)? # showMaterializedViews
| KW_SHOW KW_CREATE KW_MATERIALIZED KW_VIEW viewName (KW_AS KW_SERDE)? # showCreateMaterializedView
| (KW_DESC | KW_DESCRIBE) KW_FUNCTION KW_EXTENDED? describeFuncName # describeFunction
| (KW_DESC | KW_DESCRIBE) KW_DATABASE KW_EXTENDED? namespaceName # describeNamespace
| (KW_DESC | KW_DESCRIBE) KW_TABLE? option=(KW_EXTENDED | KW_FORMATTED)? tableName partitionSpec? describeColName? # describeRelation
| (KW_DESC | KW_DESCRIBE) KW_QUERY? query # describeQuery
| KW_COMMENT KW_ON ((namespace namespaceName) | (KW_TABLE tableName)) KW_IS (
stringLit
| KW_NULL
) # commentTable
| KW_REFRESH KW_TABLE tableName # refreshTable
| KW_REFRESH KW_FUNCTION functionName # refreshFunction
| KW_REFRESH (stringLit | .*?) # refreshResource
| KW_REFRESH KW_MATERIALIZED KW_VIEW viewName # refreshMaterializedView
| KW_CACHE KW_LAZY? KW_TABLE tableName (KW_OPTIONS options=propertyList)? (KW_AS? query)? # cacheTable
| KW_UNCACHE KW_TABLE (ifExists)? tableName # unCacheTable
| KW_CLEAR KW_CACHE # clearCache
| KW_LOAD KW_DATA KW_LOCAL? KW_INPATH path=stringLit KW_OVERWRITE? KW_INTO KW_TABLE tableName partitionSpec? # loadData
| KW_TRUNCATE KW_TABLE tableName partitionSpec? # truncateTable
| (KW_MSCK)? KW_REPAIR KW_TABLE tableName (option=(KW_ADD | KW_DROP | KW_SYNC) KW_PARTITIONS)? # repairTable
| op=(KW_ADD | KW_LIST) identifier .*? # manageResource
| KW_SET KW_ROLE .*? # failNativeCommand
| KW_SET KW_TIME KW_ZONE interval # setTimeZoneInterval
| KW_SET KW_TIME KW_ZONE (stringLit | KW_LOCAL) # setTimeZone
| KW_SET KW_TIME KW_ZONE .*? # setTimeZoneAny
| KW_SET (KW_VARIABLE | KW_VAR) assignmentList # setVariableAssignment
| KW_SET (KW_VARIABLE | KW_VAR) LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ LEFT_PAREN query RIGHT_PAREN # setVariableMultiAssignment
| KW_SET quotedIdentifier EQ BACKQUOTED_IDENTIFIER # setConfig
| KW_SET quotedIdentifier (EQ .*?)? # setConfigAndValue
| KW_SET .*? EQ BACKQUOTED_IDENTIFIER # setConfigAnyKey
| KW_SET .*? # setAny
| KW_RESET quotedIdentifier # resetConfig
| KW_RESET .*? # resetAny
| KW_CREATE KW_INDEX (ifNotExists)? identifier KW_ON KW_TABLE? tableName (
KW_USING indexType=identifier
)? LEFT_PAREN multipartIdentifierPropertyList RIGHT_PAREN (KW_OPTIONS options=propertyList)? # createIndex
| KW_DROP KW_INDEX (ifExists)? identifier KW_ON KW_TABLE? tableName # dropIndex
| KW_OPTIMIZE tableName whereClause? zOrderClause # optimizeTable
| unsupportedHiveNativeCommands .*? # unsupportHiveCommands
;
unsupportedHiveNativeCommands
: kw1=(KW_CREATE | KW_DROP) kw2=KW_ROLE
| kw1=(KW_GRANT | KW_REVOKE) kw2=KW_ROLE?
| kw1=KW_SHOW kw2=(
KW_GRANT
| KW_PRINCIPALS
| KW_COMPACTIONS
| KW_TRANSACTIONS
| KW_INDEXES
| KW_LOCKS
)
| kw1=KW_SHOW kw2=KW_ROLE kw3=KW_GRANT?
| kw1=KW_SHOW KW_CURRENT? KW_ROLES
| kw1=KW_SHOW kw2=KW_CREATE kw3=KW_TABLE
| kw1=(KW_CREATE | KW_DROP | KW_ALTER) kw2=KW_INDEX
| kw1=(KW_EXPORT | KW_IMPORT | KW_LOCK | KW_UNLOCK) kw2=KW_TABLE
| kw1=(KW_LOCK | KW_UNLOCK) kw2=KW_DATABASE
| kw1=(KW_CREATE | KW_DROP) kw2=KW_TEMPORARY kw3=KW_MACRO
| kw1=KW_ALTER kw2=KW_TABLE tableName kw3=KW_NOT kw4=(KW_CLUSTERED | KW_SORTED | KW_SKEWED)
| kw1=KW_ALTER kw2=KW_TABLE tableName kw3=(KW_CLUSTERED | KW_SKEWED) kw4=KW_BY
| kw1=KW_ALTER kw2=KW_TABLE tableName kw3=KW_SKEWED kw4=KW_BY
| kw1=KW_ALTER kw2=KW_TABLE tableName kw3=KW_NOT kw4=KW_STORED kw5=KW_AS kw6=KW_DIRECTORIES
| kw1=KW_ALTER kw2=KW_TABLE tableName kw3=KW_SET kw4=KW_SKEWED kw5=KW_LOCATION
| kw1=KW_ALTER kw2=KW_TABLE tableName kw3=(KW_EXCHANGE | KW_ARCHIVE | KW_UNARCHIVE) kw4=KW_PARTITION
| kw1=KW_ALTER kw2=KW_TABLE tableName kw3=KW_TOUCH
| kw1=KW_ALTER kw2=KW_TABLE tableName partitionSpec? (
KW_COMPACT
| KW_CONCATENATE
| (KW_SET KW_FILEFORMAT)
| (KW_REPLACE KW_COLUMNS)
)
| kw1=KW_START kw2=KW_TRANSACTION
| kw1=KW_COMMIT
| kw1=KW_ROLLBACK
| kw1=KW_DFS
;
bucketSpec
: KW_CLUSTERED KW_BY identifierList (KW_SORTED KW_BY orderedIdentifierList)? KW_INTO INTEGER_VALUE KW_BUCKETS
;
skewSpec
: KW_SKEWED KW_BY identifierList KW_ON (constantList | nestedConstantList) (
KW_STORED KW_AS KW_DIRECTORIES
)?
;
locationSpec
: KW_LOCATION stringLit
;
commentSpec
: KW_COMMENT comment=stringLit
;
query
: ctes? queryTerm queryOrganization # queryStatement
;
insertInto
: KW_INSERT KW_OVERWRITE KW_TABLE? tableName (partitionSpec (ifNotExists)?)? (
(KW_BY KW_NAME)
| (LEFT_PAREN columnNameSeq RIGHT_PAREN)
)?
| KW_INSERT KW_INTO KW_TABLE? tableName partitionSpec? (ifNotExists)? (
(KW_BY KW_NAME)
| (LEFT_PAREN columnNameSeq RIGHT_PAREN)
)?
| KW_INSERT KW_INTO KW_TABLE? tableName KW_REPLACE whereClause
| KW_INSERT KW_OVERWRITE KW_LOCAL? KW_DIRECTORY path=stringLit rowFormat? createFileFormat?
| KW_INSERT KW_OVERWRITE KW_LOCAL? KW_DIRECTORY (path=stringLit)? tableProvider (
KW_OPTIONS options=propertyList
)?
;
partitionSpecLocation
: partitionSpec (KW_LOCATION stringLit)?
;
partitionSpec
: KW_PARTITION LEFT_PAREN partitionVal (COMMA partitionVal)* RIGHT_PAREN
;
partitionVal
: identifier (EQ constant)?
| identifier EQ KW_DEFAULT
;
namespace
: KW_NAMESPACE
| KW_DATABASE
| KW_SCHEMA
;
describeFuncName
: identifierReference
| stringLit
| comparisonOperator
| arithmeticOperator
| predicateOperator
;
describeColName
: nameParts+=identifier (DOT nameParts+=identifier)*
;
ctes
: KW_WITH namedQuery (COMMA namedQuery)*
;
namedQuery
: name=errorCapturingIdentifier (columnAliases=identifierList)? KW_AS? LEFT_PAREN query RIGHT_PAREN
;
tableProvider
: KW_USING multipartIdentifier
;
createTableClauses
: (
(KW_OPTIONS options=expressionPropertyList)
| (KW_PARTITIONED KW_BY partitioning=partitionFieldList)
| skewSpec
| bucketSpec
| rowFormat
| createFileFormat
| KW_LOCATION stringLit
| KW_COMMENT comment=stringLit
| (KW_TBLPROPERTIES tableProps=propertyList)
| tableLifecycle
)*
;
// dtstack SparkSQL/HiveSQL lifecycle
tableLifecycle
: KW_LIFECYCLE INTEGER_VALUE
;
propertyList
: LEFT_PAREN property (COMMA property)* RIGHT_PAREN
;
property
: key=propertyKey (EQ? value=propertyValue)?
;
propertyKey
: identifier (DOT identifier)*
| stringLit
;
propertyValue
: INTEGER_VALUE
| DECIMAL_VALUE
| booleanValue
| stringLit
;
expressionPropertyList
: LEFT_PAREN expressionProperty (COMMA expressionProperty)* RIGHT_PAREN
;
expressionProperty
: key=propertyKey (EQ? value=expression)?
;
constantList
: LEFT_PAREN constant (COMMA constant)* RIGHT_PAREN
;
nestedConstantList
: LEFT_PAREN constantList (COMMA constantList)* RIGHT_PAREN
;
createFileFormat
: KW_STORED KW_AS fileFormat
| KW_STORED KW_BY storageHandler
;
fileFormat
: KW_INPUTFORMAT inFmt=stringLit KW_OUTPUTFORMAT outFmt=stringLit
| identifier
;
storageHandler
: stringLit (KW_WITH KW_SERDEPROPERTIES propertyList)?
;
dmlStatementNoWith
: insertInto query # insertFromQuery
| fromClause (insertInto fromStatementBody)+ # multipleInsert
| KW_DELETE KW_FROM tableName tableAlias whereClause? # deleteFromTable
| KW_UPDATE tableName tableAlias setClause whereClause? # updateTable
| KW_MERGE KW_INTO target=tableName targetAlias=tableAlias KW_USING (
source=identifierReference
| LEFT_PAREN sourceQuery=query RIGHT_PAREN
) sourceAlias=tableAlias KW_ON mergeCondition=booleanExpression matchedClause* notMatchedClause* notMatchedBySourceClause* # mergeIntoTable
;
namespaceName
: identifierReference
;
namespaceNameCreate
: identifierReference
;
tableNameCreate
: tableIdentifier
;
tableName
: tableIdentifier
;
viewNameCreate
: viewIdentifier
;
viewName
: viewIdentifier
;
emptyColumn
:
;
columnName
: multipartIdentifierAllowEmpty
| {this.shouldMatchEmpty()}? emptyColumn
;
columnNamePath
: multipartIdentifier
;
columnNamePathAllowEmpty
: multipartIdentifierAllowEmpty
| {this.shouldMatchEmpty()}? emptyColumn
;
columnNameSeq
: columnName (COMMA columnName)*
;
columnNameCreate
: errorCapturingIdentifier
;
identifierReference
: KW_IDENTIFIER LEFT_PAREN expression RIGHT_PAREN
| multipartIdentifier
;
queryOrganization
: (KW_ORDER KW_BY orderOrSortByClause)? (KW_CLUSTER KW_BY clusterOrDistributeBy)? (
KW_DISTRIBUTE KW_BY clusterOrDistributeBy
)? (KW_SORT KW_BY orderOrSortByClause)? windowClause? limitClause? (
KW_OFFSET offset=expression
)?
;
limitClause
: KW_LIMIT (KW_ALL | limit=expression)
;
orderOrSortByClause
: sortItem (COMMA sortItem)*
;
clusterOrDistributeBy
: expression (COMMA expression)*
;
queryTerm
: queryPrimary
| left=queryTerm operator=(KW_INTERSECT | KW_UNION | KW_EXCEPT | KW_MINUS) setQuantifier? right=queryTerm
| left=queryTerm operator=KW_INTERSECT setQuantifier? right=queryTerm
| left=queryTerm operator=(KW_UNION | KW_EXCEPT | KW_MINUS) setQuantifier? right=queryTerm
;
queryPrimary
: querySpecification
| fromClause fromStatementBody+
| KW_TABLE tableName
| KW_VALUES expression (COMMA expression)* tableAlias
| LEFT_PAREN query RIGHT_PAREN
;
sortItem
: (columnName | expression) ordering=(KW_ASC | KW_DESC)? (
KW_NULLS nullOrder=(KW_LAST | KW_FIRST)
)?
;
fromStatementBody
: transformClause whereClause? queryOrganization
| selectClause lateralView* whereClause? aggregationClause? havingClause? windowClause? queryOrganization
;
querySpecification
: transformClause fromClause? lateralView* whereClause? aggregationClause? havingClause? windowClause?
| selectClause fromClause? lateralView* whereClause? aggregationClause? havingClause? windowClause?
;
transformClause
: (
KW_SELECT kind=KW_TRANSFORM LEFT_PAREN setQuantifier? expressionSeq RIGHT_PAREN
| kind=KW_MAP setQuantifier? expressionSeq
| kind=KW_REDUCE setQuantifier? expressionSeq
) inRowFormat=rowFormat? (KW_RECORDWRITER recordWriter=stringLit)? KW_USING script=stringLit (
KW_AS (
identifierSeq
| colTypeList
| (LEFT_PAREN (identifierSeq | colTypeList) RIGHT_PAREN)
)
)? outRowFormat=rowFormat? (KW_RECORDREADER recordReader=stringLit)?
;
selectClause
: KW_SELECT (hints+=hint)* setQuantifier? selectList
;
setClause
: KW_SET assignmentList
;
matchedClause
: KW_WHEN KW_MATCHED (KW_AND matchedCond=booleanExpression)? KW_THEN (
KW_DELETE
| KW_UPDATE KW_SET (ASTERISK | assignmentList)
)
;
notMatchedClause
: KW_WHEN KW_NOT KW_MATCHED (KW_BY KW_TARGET)? (KW_AND notMatchedCond=booleanExpression)? KW_THEN notMatchedAction
;
notMatchedBySourceClause
: KW_WHEN KW_NOT KW_MATCHED KW_BY KW_SOURCE (KW_AND notMatchedBySourceCond=booleanExpression)? KW_THEN (
KW_DELETE
| KW_UPDATE KW_SET assignmentList
)
;
notMatchedAction
: KW_INSERT ASTERISK
| KW_INSERT LEFT_PAREN multipartIdentifierList RIGHT_PAREN KW_VALUES LEFT_PAREN expression (
COMMA expression
)* RIGHT_PAREN
;
assignmentList
: assignment (COMMA assignment)*
;
assignment
: key=multipartIdentifier EQ value=expression
;
whereClause
: KW_WHERE booleanExpression
;
havingClause
: KW_HAVING booleanExpression
;
hint
: HENT_START hintStatements+=hintStatement (COMMA? hintStatements+=hintStatement)* HENT_END
;
hintStatement
: hintName=identifier
| hintName=identifier LEFT_PAREN parameters+=primaryExpression (
COMMA parameters+=primaryExpression
)* RIGHT_PAREN
;
fromClause
: KW_FROM relation (COMMA relation)* lateralView* pivotClause? unPivotClause?
;
temporalClause
: KW_FOR? (KW_SYSTEM_VERSION | KW_VERSION) KW_AS KW_OF (INTEGER_VALUE | stringLit)
| KW_FOR? (KW_SYSTEM_TIME | KW_TIMESTAMP) KW_AS KW_OF timestamp=valueExpression
;
aggregationClause
: KW_GROUP KW_BY groupingExpressionsWithGroupingAnalytics+=groupByClause (
COMMA groupingExpressionsWithGroupingAnalytics+=groupByClause
)*
| KW_GROUP KW_BY groupingExpressions+=expression (COMMA groupingExpressions+=expression)* (
KW_WITH kind=KW_ROLLUP
| KW_WITH kind=KW_CUBE
| kind=KW_GROUPING KW_SETS LEFT_PAREN groupingSet (COMMA groupingSet)* RIGHT_PAREN
)?
;
groupByClause
: columnName
| groupingAnalytics
| expression
;
groupingAnalytics
: (KW_ROLLUP | KW_CUBE) LEFT_PAREN groupingSet (COMMA groupingSet)* RIGHT_PAREN
| KW_GROUPING KW_SETS LEFT_PAREN (groupingAnalytics | groupingSet) (
COMMA (groupingAnalytics | groupingSet)
)* RIGHT_PAREN
;
groupingSet
: columnName
| expression
| LEFT_PAREN ((columnName | expression) (COMMA (columnName | expression))*)? RIGHT_PAREN
;
pivotClause
: KW_PIVOT LEFT_PAREN aggregates=namedExpressionSeq KW_FOR pivotColumn KW_IN LEFT_PAREN pivotValues+=pivotValue (
COMMA pivotValues+=pivotValue
)* RIGHT_PAREN RIGHT_PAREN
;
pivotColumn
: identifiers+=identifier
| LEFT_PAREN identifiers+=identifier (COMMA identifiers+=identifier)* RIGHT_PAREN
;
pivotValue
: expression (KW_AS? identifier)?
;
unPivotClause
: KW_UNPIVOT ((KW_INCLUDE | KW_EXCLUDE) KW_NULLS)? LEFT_PAREN (
unPivotSingleValueColumnClause
| unPivotMultiValueColumnClause
) RIGHT_PAREN (KW_AS? identifier)?
;
unPivotSingleValueColumnClause
: identifier KW_FOR identifier KW_IN LEFT_PAREN unPivotColumns+=unPivotColumnAndAlias (
COMMA unPivotColumns+=unPivotColumnAndAlias
)* RIGHT_PAREN
;
unPivotMultiValueColumnClause
: LEFT_PAREN unPivotValueColumns+=identifier (COMMA unPivotValueColumns+=identifier)* RIGHT_PAREN KW_FOR identifier KW_IN LEFT_PAREN
unPivotColumnSets+=unPivotColumnSet (COMMA unPivotColumnSets+=unPivotColumnSet)* RIGHT_PAREN
;
unPivotColumnSet
: LEFT_PAREN unPivotColumns+=multipartIdentifier (COMMA unPivotColumns+=multipartIdentifier)* RIGHT_PAREN (
KW_AS? identifier
)?
;
unPivotColumnAndAlias
: multipartIdentifier (KW_AS? identifier)?
;
ifNotExists
: KW_IF KW_NOT KW_EXISTS
;
ifExists
: KW_IF KW_EXISTS
;
lateralView
: KW_LATERAL KW_VIEW (KW_OUTER)? viewName LEFT_PAREN (expression (COMMA expression)*)? RIGHT_PAREN tableAlias (
KW_AS? colName+=identifier (COMMA colName+=identifier)*
)?
;
setQuantifier
: KW_DISTINCT
| KW_ALL
;
relation
: tableName
| KW_LATERAL? relationPrimary (joinRelation | pivotClause | unPivotClause)*
;
joinRelation
: (joinType) KW_JOIN KW_LATERAL? right=relationPrimary joinCriteria?
| KW_NATURAL joinType KW_JOIN KW_LATERAL? right=relationPrimary
;
joinType
: KW_INNER?
| KW_CROSS
| KW_LEFT KW_OUTER?
| KW_LEFT? (KW_SEMI | KW_ANTI)
| (KW_RIGHT | KW_FULL) KW_OUTER?
;
joinCriteria
: KW_ON (booleanExpression | columnNamePathAllowEmpty (EQ columnNamePathAllowEmpty)?)
| KW_USING identifierList
;
sample
: KW_TABLESAMPLE LEFT_PAREN sampleMethod? RIGHT_PAREN (
KW_REPEATABLE LEFT_PAREN seed=INTEGER_VALUE RIGHT_PAREN
)?
;
sampleMethod
: negativeSign=MINUS? percentage=(INTEGER_VALUE | DECIMAL_VALUE) KW_PERCENTLIT
| expression KW_ROWS
| sampleType=KW_BUCKET numerator=INTEGER_VALUE KW_OUT KW_OF denominator=INTEGER_VALUE (
KW_ON (identifier | qualifiedName LEFT_PAREN RIGHT_PAREN)
)?
| bytes=expression
;
identifierList
: LEFT_PAREN identifierSeq RIGHT_PAREN
;
identifierSeq
: ident+=errorCapturingIdentifier (COMMA ident+=errorCapturingIdentifier)*
;
orderedIdentifierList
: LEFT_PAREN orderedIdentifier (COMMA orderedIdentifier)* RIGHT_PAREN
;
orderedIdentifier
: ident=errorCapturingIdentifier ordering=(KW_ASC | KW_DESC)?
;
identifierCommentList
: LEFT_PAREN identifierComment (COMMA identifierComment)* RIGHT_PAREN
;
identifierComment
: columnNameCreate (KW_COMMENT comment=stringLit)?
;
relationPrimary
: (tableName | viewName | identifierReference) temporalClause? sample? tableAlias # tableSource
| atomSubQueryTableSource sample? tableAlias # subQueryTableSource
| LEFT_PAREN relation RIGHT_PAREN sample? tableAlias # joinTableSource
| inlineTable tableAlias # inlineTableSource
| functionTable # functionTableSource
;
atomSubQueryTableSource
: LEFT_PAREN query RIGHT_PAREN
;
inlineTable
: KW_VALUES expression (COMMA expression)*
;
functionTableSubqueryArgument
: KW_TABLE tableName tableArgumentPartitioning?
| KW_TABLE LEFT_PAREN tableName RIGHT_PAREN tableArgumentPartitioning?
| KW_TABLE LEFT_PAREN query RIGHT_PAREN tableArgumentPartitioning?
;
tableArgumentPartitioning
: (
(KW_WITH KW_SINGLE KW_PARTITION)
| (
(KW_PARTITION | KW_DISTRIBUTE) KW_BY (
((LEFT_PAREN partition+=expression (COMMA partition+=expression)* RIGHT_PAREN))
| partition+=expression
)
)
) ((KW_ORDER | KW_SORT) KW_BY ( ((LEFT_PAREN orderOrSortByClause RIGHT_PAREN) | sortItem)))?
;
functionTableNamedArgumentExpression
: key=identifier FAT_ARROW table=functionTableSubqueryArgument
;
functionTableReferenceArgument
: functionTableSubqueryArgument
| functionTableNamedArgumentExpression
;
functionTableArgument
: functionTableReferenceArgument
| functionArgument
;
functionTable
: atomFunctionTable tableAlias
;
atomFunctionTable
: functionName LEFT_PAREN (functionTableArgument (COMMA functionTableArgument)*)? RIGHT_PAREN
;
tableAlias
: (KW_AS? alias=strictIdentifier identifierList?)?
;
rowFormat
: KW_ROW KW_FORMAT KW_SERDE name=stringLit (KW_WITH KW_SERDEPROPERTIES props=propertyList)?
| KW_ROW KW_FORMAT KW_DELIMITED (
KW_FIELDS KW_TERMINATED KW_BY fieldsTerminatedBy=stringLit (
KW_ESCAPED KW_BY escapedBy=stringLit
)?
)? (KW_COLLECTION KW_ITEMS KW_TERMINATED KW_BY collectionItemsTerminatedBy=stringLit)? (
KW_MAP KW_KEYS KW_TERMINATED KW_BY keysTerminatedBy=stringLit
)? (KW_LINES KW_TERMINATED KW_BY linesSeparatedBy=stringLit)? (
KW_NULL KW_DEFINED KW_AS nullDefinedAs=stringLit
)?
;
multipartIdentifierList
: multipartIdentifier (COMMA multipartIdentifier)*
;
multipartIdentifier
: parts+=errorCapturingIdentifier (DOT parts+=errorCapturingIdentifier)*
;
multipartIdentifierAllowEmpty
: multipartIdentifier
| {this.shouldMatchEmpty()}? multipartIdentifier DOT emptyColumn
;
multipartIdentifierPropertyList
: multipartIdentifierProperty (COMMA multipartIdentifierProperty)*
;
multipartIdentifierProperty
: multipartIdentifier (KW_OPTIONS options=propertyList)?
;
tableIdentifier
: (db=errorCapturingIdentifier DOT)? table=errorCapturingIdentifier
;
viewIdentifier
: (db=errorCapturingIdentifier DOT)? view=errorCapturingIdentifier
;
selectLiteralColumnName
: columnName
;
selectExpressionColumnName
: expression
;
tableAllColumns
: (qualifiedName DOT)* ASTERISK
;
namedExpression
: (tableAllColumns | selectLiteralColumnName | selectExpressionColumnName) (
KW_AS? (alias=errorCapturingIdentifier | identifierList)
)?
| {this.shouldMatchEmpty()}? emptyColumn
;
namedExpressionSeq
: namedExpression (COMMA namedExpression)*
;
selectList
: namedExpressionSeq
;
partitionFieldList
: LEFT_PAREN fields+=partitionField (COMMA fields+=partitionField)* RIGHT_PAREN
;
partitionField
: transform
| columnType
;
transform
: qualifiedName
| transformName=identifier LEFT_PAREN transformArgument (COMMA transformArgument)* RIGHT_PAREN
;
transformArgument
: qualifiedName
| constant
;
expression
: booleanExpression
;
namedArgumentExpression
: key=identifier FAT_ARROW value=expression
;
functionArgument
: expression
| namedArgumentExpression
;
expressionSeq
: expression (COMMA expression)*
;
booleanExpression
: (KW_NOT | NOT) booleanExpression
| KW_EXISTS LEFT_PAREN query RIGHT_PAREN
| valueExpression predicate?
| left=booleanExpression operator=KW_AND right=booleanExpression
| left=booleanExpression operator=KW_OR right=booleanExpression
;
predicate
: KW_NOT? kind=KW_BETWEEN lower=valueExpression KW_AND upper=valueExpression
| KW_NOT? kind=KW_IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN
| KW_NOT? kind=KW_IN LEFT_PAREN query RIGHT_PAREN
| KW_NOT? kind=(KW_RLIKE | KW_REGEXP) pattern=valueExpression
| KW_NOT? kind=(KW_LIKE | KW_ILIKE) quantifier=(KW_ANY | KW_SOME | KW_ALL) (
LEFT_PAREN RIGHT_PAREN
| LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN
)
| KW_NOT? kind=(KW_LIKE | KW_ILIKE) pattern=valueExpression (KW_ESCAPE escapeChar=stringLit)?
| KW_IS KW_NOT? kind=KW_NULL
| KW_IS KW_NOT? kind=(KW_TRUE | KW_FALSE | KW_UNKNOWN)
| KW_IS KW_NOT? kind=KW_DISTINCT KW_FROM right=valueExpression
;
valueExpression
: primaryExpression
| operator=(MINUS | PLUS | TILDE) valueExpression
| left=valueExpression operator=(ASTERISK | SLASH | PERCENT | KW_DIV) right=valueExpression
| left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression
| left=valueExpression operator=AMPERSAND right=valueExpression
| left=valueExpression operator=HAT right=valueExpression
| left=valueExpression operator=PIPE right=valueExpression
| left=valueExpression comparisonOperator right=valueExpression
;
datetimeUnit
: KW_YEAR
| KW_QUARTER
| KW_MONTH
| KW_WEEK
| KW_DAY
| KW_DAYOFYEAR
| KW_HOUR
| KW_MINUTE
| KW_SECOND
| KW_MILLISECOND
| KW_MICROSECOND
;
primaryExpression
: name=(KW_CURRENT_DATE | KW_CURRENT_TIMESTAMP | KW_CURRENT_USER | KW_USER | KW_SESSION_USER)
| name=(KW_TIMESTAMPADD | KW_DATEADD | KW_DATE_ADD) LEFT_PAREN (
unit=datetimeUnit
| invalidUnit=stringLit
) COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN
| name=(KW_TIMESTAMPDIFF | KW_DATEDIFF | KW_DATE_DIFF | KW_TIMEDIFF) LEFT_PAREN (
unit=datetimeUnit
| invalidUnit=stringLit
) COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN
| KW_CASE whenClause+ (KW_ELSE elseExpression=expression)? KW_END
| KW_CASE expression whenClause+ (KW_ELSE elseExpression=expression)? KW_END
| name=(KW_CAST | KW_TRY_CAST) LEFT_PAREN expression KW_AS dataType RIGHT_PAREN
| KW_STRUCT LEFT_PAREN (namedExpression (COMMA namedExpression)*)? RIGHT_PAREN
| KW_FIRST LEFT_PAREN expression (KW_IGNORE KW_NULLS)? RIGHT_PAREN
| KW_ANY_VALUE LEFT_PAREN expression (KW_IGNORE KW_NULLS)? RIGHT_PAREN
| KW_LAST LEFT_PAREN expression (KW_IGNORE KW_NULLS)? RIGHT_PAREN
| KW_POSITION LEFT_PAREN substr=valueExpression KW_IN str=valueExpression RIGHT_PAREN
| constant
| ASTERISK
| qualifiedName DOT ASTERISK
| LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN
| LEFT_PAREN query RIGHT_PAREN
| KW_IDENTIFIER LEFT_PAREN expression RIGHT_PAREN
| functionName LEFT_PAREN (setQuantifier? functionArgument (COMMA functionArgument)*)? RIGHT_PAREN (
KW_FILTER LEFT_PAREN KW_WHERE where=booleanExpression RIGHT_PAREN
)? (nullsOption=(KW_IGNORE | KW_RESPECT) KW_NULLS)? (KW_OVER windowSpec)?
| identifier ARROW expression
| LEFT_PAREN identifier (COMMA identifier)+ RIGHT_PAREN ARROW expression
| value=primaryExpression LEFT_BRACKET index=valueExpression RIGHT_BRACKET
| columnNamePath
| base=primaryExpression DOT fieldName=identifier
| LEFT_PAREN expression RIGHT_PAREN
| KW_EXTRACT LEFT_PAREN field=identifier KW_FROM source=valueExpression RIGHT_PAREN
| (KW_SUBSTR | KW_SUBSTRING) LEFT_PAREN str=valueExpression (KW_FROM | COMMA) pos=valueExpression (
(KW_FOR | COMMA) len=valueExpression
)? RIGHT_PAREN
| KW_TRIM LEFT_PAREN trimOption=(KW_BOTH | KW_LEADING | KW_TRAILING)? (
trimStr=valueExpression
)? KW_FROM srcStr=valueExpression RIGHT_PAREN
| KW_OVERLAY LEFT_PAREN input=valueExpression KW_PLACING replace=valueExpression KW_FROM position=valueExpression (
KW_FOR length=valueExpression
)? RIGHT_PAREN
| name=(KW_PERCENTILE_CONT | KW_PERCENTILE_DISC) LEFT_PAREN percentage=valueExpression RIGHT_PAREN KW_WITHIN KW_GROUP LEFT_PAREN KW_ORDER KW_BY
sortItem RIGHT_PAREN (KW_FILTER LEFT_PAREN KW_WHERE where=booleanExpression RIGHT_PAREN)? (
KW_OVER windowSpec
)?
;
literalType
: KW_DATE
| KW_TIMESTAMP
| KW_TIMESTAMP_LTZ