Skip to content

Commit e74b250

Browse files
committed
feat(snowflake): sync upstream grammar and fix task scripting parse
1 parent effef73 commit e74b250

11 files changed

+116219
-79953
lines changed

snowflake/SnowflakeLexer.g4

Lines changed: 1222 additions & 1149 deletions
Large diffs are not rendered by default.

snowflake/SnowflakeParser.g4

Lines changed: 2259 additions & 1595 deletions
Large diffs are not rendered by default.

snowflake/build_id_contains_non_reserved_keywords.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -127,28 +127,35 @@
127127

128128
def read_tokens_name_before_token_from_lexer_file(filepath: str, token: str) -> list[str]:
129129
tokens_name_before_token = []
130-
regex = r"^(?P<token_name>[A-Z_]+)\s*:"
130+
regex = r"^\s*(?P<token_name>[A-Z_][A-Z_0-9]*)\s*:"
131131
start_placeholder = "Build id contains the non reserved keywords start."
132132
stop_placeholder = "Build id contains the non reserved keywords stop."
133133
begin = False
134134
with open(filepath, "r") as lexer_file:
135135
lines = lexer_file.readlines()
136+
has_placeholder = any(start_placeholder in line for line in lines) and any(stop_placeholder in line for line in lines)
137+
if not has_placeholder:
138+
begin = True
139+
reached_target_token = False
136140
for line in lines:
137-
if start_placeholder in line:
141+
if has_placeholder and start_placeholder in line:
138142
begin = True
139143
continue
140144
if line.isspace() or (not begin):
141145
continue
142-
if (stop_placeholder in line):
146+
if has_placeholder and (stop_placeholder in line):
143147
break
144148

145149
matches = re.finditer(regex, line, re.MULTILINE)
146150
for matchNum, match in enumerate(matches, start=1):
147151
if matchNum > 1:
148152
break
149153
if match.group("token_name") == token:
154+
reached_target_token = True
150155
break
151156
tokens_name_before_token.append(match.group("token_name"))
157+
if reached_target_token:
158+
break
152159
return tokens_name_before_token
153160

154161
def pretty_print(tokens: list[str], hello: str | None) -> None:
@@ -197,9 +204,10 @@ def append_non_reserved_token_to_rules_in_parser(parser_file_path: str, append_r
197204

198205

199206
def get_content_by_token_name(content: str, token_name: str) -> str:
200-
token_regex = r"^(%s)\s*:[.\s\S]*?;" % token_name
201-
# Get the content of the rules_regex match.
202-
token_content = re.search(token_regex, content, re.MULTILINE)
207+
# Support upstream grammar formatting where comments may appear between
208+
# rule name and ':' and where rules can be indented.
209+
token_regex = r"(?ms)^\s*(%s)\s*(?:\n\s*//[^\n]*)*\n\s*:[\s\S]*?\n\s*;" % re.escape(token_name)
210+
token_content = re.search(token_regex, content)
203211
if token_content:
204212
return token_content.group(0)
205213
return None
@@ -214,4 +222,3 @@ def get_content_by_token_name(content: str, token_name: str) -> str:
214222
pretty_print(filtered_tokens, "Tokens before ID token without reserved keywords:")
215223
append_non_reserved_token_to_rules_in_parser("SnowflakeParser.g4", "id_", "supplement_non_reserved_words", filtered_tokens)
216224

217-

snowflake/examples/create_pipe.sql

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
-- internal stages
2+
-- named stage without schema
3+
create or replace pipe P1 auto_ingest=false as COPY INTO TABLE1 FROM @STAGE1;
4+
--named stage with schema
5+
create or replace pipe P2 auto_ingest=false as COPY INTO SCH1.TABLE1 FROM @SCH1.STAGE1;
6+
-- named stage with schema and path
7+
create or replace pipe P3 auto_ingest=false as COPY INTO TABLE1 FROM @SCH1.STAGE1/dir1/dir2/;
8+
create or replace pipe P4 auto_ingest=false as COPY INTO TABLE1 FROM @SCH1.STAGE1/;
9+
10+
-- snowflake docs examples
11+
create pipe mypipe as copy into mytable from @mystage;
12+
13+
create pipe mypipe2 as copy into mytable(C1, C2) from (select $5, $4 from @mystage);
14+
15+
create pipe mypipe3 as copy into mytable(C1, C2) from (select $5, $4 from @mysch.mystage/x/y/z);
16+
17+
create pipe mypipe_s3
18+
auto_ingest = true
19+
aws_sns_topic = 'arn:aws:sns:us-west-2:001234567890:s3_mybucket'
20+
as
21+
copy into snowpipe_db.public.mytable
22+
from @snowpipe_db.public.mystage
23+
file_format = (type = 'JSON');
24+
25+
create pipe mypipe_gcs
26+
auto_ingest = true
27+
integration = 'MYINT'
28+
as
29+
copy into snowpipe_db.public.mytable
30+
from @snowpipe_db.public.mystage
31+
file_format = (type = 'JSON');
32+
33+
create pipe mypipe_azure
34+
auto_ingest = true
35+
integration = 'MYINT'
36+
as
37+
copy into snowpipe_db.public.mytable
38+
from @snowpipe_db.public.mystage
39+
file_format = (type = 'JSON');
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
CREATE OR REPLACE TASK test_logging
2+
USER_TASK_MANAGED_INITIAL_WAREHOUSE_SIZE = 'XSMALL'
3+
SCHEDULE = 'USING CRON 0 * * * * America/Los_Angeles'
4+
AS
5+
BEGIN
6+
ALTER SESSION SET TIMESTAMP_OUTPUT_FORMAT = 'YYYY-MM-DD HH24:MI:SS.FF';
7+
SELECT CURRENT_TIMESTAMP;
8+
END;
9+
10+
CREATE TASK t1
11+
USER_TASK_MANAGED_INITIAL_WAREHOUSE_SIZE = 'XSMALL'
12+
SCHEDULE = '15 SECONDS'
13+
AS
14+
DECLARE
15+
radius_of_circle float;
16+
area_of_circle float;
17+
BEGIN
18+
radius_of_circle := 3;
19+
area_of_circle := pi() * radius_of_circle * radius_of_circle;
20+
return area_of_circle;
21+
END;

0 commit comments

Comments
 (0)