Skip to content

Commit 76a773a

Browse files
h3n4lclaude
andcommitted
feat(cosmosdb): improve grammar compatibility with coalesce, FROM rework, and more
Add missing CosmosDB SQL syntax features validated against official docs, reference grammars (Azure ANTLR, Vercel PEG), and CosmosDB emulator. Lexer: ESCAPE, ARRAY, ROOT, RANK keywords; <> and ?? operators; -- line comments; Infinity/NaN constants (case-sensitive). Parser: ?? coalesce at OR precedence; ARRAY(subquery); LIKE ESCAPE; parameter in bracket access c[@param]; <> inequality. FROM clause: reworked to support ROOT, subroot paths (container.property), IN iteration (s IN container.sizes), subqueries, flexible JOINs. Keywords as property names: 17 keywords usable after dot (c.select, c.from, etc). ORDER BY RANK for full-text search syntax. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6c474df commit 76a773a

18 files changed

Lines changed: 1901 additions & 965 deletions

cosmosdb/CosmosDBLexer.g4

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ EXISTS_SYMBOL: 'EXISTS';
6161
LIKE_SYMBOL: 'LIKE';
6262
HAVING_SYMBOL: 'HAVING';
6363
JOIN_SYMBOL: 'JOIN';
64+
ESCAPE_SYMBOL: 'ESCAPE';
65+
ARRAY_SYMBOL: 'ARRAY';
66+
ROOT_SYMBOL: 'ROOT';
67+
RANK_SYMBOL: 'RANK';
6468

6569
AT_SYMBOL: '@';
6670
LC_BRACKET_SYMBOL: '{';
@@ -73,6 +77,7 @@ SINGLE_QUOTE_SYMBOL: '\'';
7377
DOUBLE_QUOTE_SYMBOL: '"';
7478
COMMA_SYMBOL: ',';
7579
DOT_SYMBOL: '.';
80+
DOUBLE_QUESTION_MARK_SYMBOL: '??';
7681
QUESTION_MARK_SYMBOL: '?';
7782
COLON_SYMBOL: ':';
7883
PLUS_SYMBOL: '+';
@@ -93,15 +98,22 @@ LEFT_SHIFT_OPERATOR: '<<';
9398
RIGHT_SHIFT_OPERATOR: '>>';
9499
ZERO_FILL_RIGHT_SHIFT_OPERATOR: '>>>';
95100
NOT_EQUAL_OPERATOR: '!=';
101+
NOT_EQUAL_OPERATOR_2: '<>';
96102

97103

104+
/* Constants */
105+
INFINITY_SYMBOL options { caseInsensitive = false; }: 'Infinity';
106+
NAN_SYMBOL options { caseInsensitive = false; }: 'NaN';
107+
98108
/* Identifiers */
99109
IDENTIFIER: [a-z_] [a-z_0-9]*;
100110

101111
// White space handling
102112
WHITESPACE:
103113
[ \t\f\r\n] -> channel(HIDDEN); // Ignore whitespaces.
104114

115+
LINE_COMMENT: '--' ~[\r\n]* -> channel(HIDDEN);
116+
105117
// Decimal literal.
106118
fragment DEC_DIGIT: [0-9];
107119
fragment DEC_DOT_DEC: (

cosmosdb/CosmosDBParser.g4

Lines changed: 49 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,30 @@ order_by_clause:
3434
COMMA_SYMBOL sort_expression
3535
)*;
3636

37-
sort_expression: scalar_expression (ASC_SYMBOL | DESC_SYMBOL)?;
37+
sort_expression:
38+
scalar_expression (ASC_SYMBOL | DESC_SYMBOL)?
39+
| RANK_SYMBOL scalar_expression;
3840

3941
offset_limit_clause: OFFSET_SYMBOL DECIMAL LIMIT_SYMBOL DECIMAL;
4042

41-
from_specification: from_source;
43+
from_specification: from_source (join_clause)*;
4244

43-
from_source: container_expression (join_clause)*;
45+
from_source:
46+
container_expression (AS_SYMBOL? identifier)?
47+
| identifier IN_SYMBOL container_expression;
4448

45-
container_expression: container_name (AS_SYMBOL? identifier)?;
49+
container_expression:
50+
ROOT_SYMBOL
51+
| container_name
52+
| container_expression DOT_SYMBOL property_name
53+
| container_expression LS_BRACKET_SYMBOL (
54+
DOUBLE_QUOTE_STRING_LITERAL
55+
| SINGLE_QUOTE_STRING_LITERAL
56+
| array_index
57+
) RS_BRACKET_SYMBOL
58+
| LR_BRACKET_SYMBOL select RR_BRACKET_SYMBOL;
4659

47-
join_clause:
48-
JOIN_SYMBOL identifier IN_SYMBOL scalar_expression;
60+
join_clause: JOIN_SYMBOL from_source;
4961

5062
container_name: identifier;
5163

@@ -70,11 +82,13 @@ scalar_expression:
7082
| LR_BRACKET_SYMBOL scalar_expression RR_BRACKET_SYMBOL
7183
| LR_BRACKET_SYMBOL select RR_BRACKET_SYMBOL
7284
| EXISTS_SYMBOL LR_BRACKET_SYMBOL select RR_BRACKET_SYMBOL
85+
| ARRAY_SYMBOL LR_BRACKET_SYMBOL select RR_BRACKET_SYMBOL
7386
| scalar_expression DOT_SYMBOL property_name
7487
| scalar_expression LS_BRACKET_SYMBOL (
7588
DOUBLE_QUOTE_STRING_LITERAL
7689
| SINGLE_QUOTE_STRING_LITERAL
7790
| array_index
91+
| parameter_name
7892
) RS_BRACKET_SYMBOL
7993
| unary_operator scalar_expression
8094
| NOT_SYMBOL scalar_expression
@@ -90,9 +104,10 @@ scalar_expression:
90104
scalar_expression (COMMA_SYMBOL scalar_expression)*
91105
)? RR_BRACKET_SYMBOL
92106
| scalar_expression NOT_SYMBOL? BETWEEN_SYMBOL scalar_expression AND_SYMBOL scalar_expression
93-
| scalar_expression NOT_SYMBOL? LIKE_SYMBOL scalar_expression
107+
| scalar_expression NOT_SYMBOL? LIKE_SYMBOL scalar_expression (ESCAPE_SYMBOL scalar_expression)?
94108
| scalar_expression AND_SYMBOL scalar_expression
95109
| scalar_expression OR_SYMBOL scalar_expression
110+
| scalar_expression DOUBLE_QUESTION_MARK_SYMBOL scalar_expression
96111
| scalar_expression QUESTION_MARK_SYMBOL scalar_expression COLON_SYMBOL scalar_expression;
97112

98113
create_array_expression:
@@ -139,6 +154,7 @@ shift_operator:
139154
comparison_operator:
140155
EQUAL_SYMBOL
141156
| NOT_EQUAL_OPERATOR
157+
| NOT_EQUAL_OPERATOR_2
142158
| LESS_THAN_OPERATOR
143159
| LESS_THAN_EQUAL_OPERATOR
144160
| GREATER_THAN_OPERATOR
@@ -162,7 +178,7 @@ null_constant: NULL_SYMBOL;
162178

163179
boolean_constant: TRUE_SYMBOL | FALSE_SYMBOL;
164180

165-
number_constant: decimal_literal | hexadecimal_literal;
181+
number_constant: decimal_literal | hexadecimal_literal | INFINITY_SYMBOL | NAN_SYMBOL;
166182

167183
string_constant: string_literal;
168184

@@ -192,9 +208,31 @@ identifier:
192208
| EXISTS_SYMBOL
193209
| LIKE_SYMBOL
194210
| HAVING_SYMBOL
195-
| JOIN_SYMBOL;
196-
197-
property_name: identifier;
211+
| JOIN_SYMBOL
212+
| ESCAPE_SYMBOL
213+
| ARRAY_SYMBOL
214+
| ROOT_SYMBOL
215+
| RANK_SYMBOL;
216+
217+
property_name:
218+
identifier
219+
| SELECT_SYMBOL
220+
| FROM_SYMBOL
221+
| WHERE_SYMBOL
222+
| NOT_SYMBOL
223+
| AND_SYMBOL
224+
| OR_SYMBOL
225+
| AS_SYMBOL
226+
| TRUE_SYMBOL
227+
| FALSE_SYMBOL
228+
| NULL_SYMBOL
229+
| UNDEFINED_SYMBOL
230+
| UDF_SYMBOL
231+
| DISTINCT_SYMBOL
232+
| ARRAY_SYMBOL
233+
| ROOT_SYMBOL
234+
| ESCAPE_SYMBOL
235+
| RANK_SYMBOL;
198236

199237
array_index: DECIMAL;
200238

cosmosdb/cosmosdb_lexer.go

Lines changed: 340 additions & 295 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cosmosdb/cosmosdb_parser.go

Lines changed: 1486 additions & 659 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SELECT c.name, ARRAY(SELECT VALUE t FROM t IN c.tags WHERE t.active = true) AS activeTags FROM c
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SELECT c.id, c[@fieldName] AS fieldValue FROM c WHERE c[@filterField] = @filterValue
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SELECT c.name ?? "unknown" AS displayName, c.nickname ?? c.name ?? "anonymous" AS nick FROM c
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SELECT VALUE s FROM s IN products.sizes WHERE s.key = 'l'

cosmosdb/examples/from_root.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SELECT * FROM ROOT r WHERE r.id = '1'
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SELECT sub.name FROM (SELECT c.name, c.city FROM c) AS sub

0 commit comments

Comments
 (0)