bytebase
diff --git a/‎snowflake/SnowflakeLexer.g4‎
Lines changed: 1222 additions & 1149 deletions b/‎snowflake/SnowflakeLexer.g4‎
Lines changed: 1222 additions & 1149 deletions
diff --git a/‎snowflake/SnowflakeParser.g4‎
Lines changed: 2259 additions & 1595 deletions b/‎snowflake/SnowflakeParser.g4‎
Lines changed: 2259 additions & 1595 deletions
diff --git a/‎snowflake/build_id_contains_non_reserved_keywords.py‎
Lines changed: 14 additions & 7 deletions b/‎snowflake/build_id_contains_non_reserved_keywords.py‎
Lines changed: 14 additions & 7 deletions
diff --git a/‎snowflake/examples/create_pipe.sql‎
Lines changed: 39 additions & 0 deletions b/‎snowflake/examples/create_pipe.sql‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎snowflake/examples/task_scripting.sql‎
Lines changed: 21 additions & 0 deletions b/‎snowflake/examples/task_scripting.sql‎
Lines changed: 21 additions & 0 deletions
@@ -127,28 +127,35 @@
 
 def read_tokens_name_before_token_from_lexer_file(filepath: str, token: str) -> list[str]:    
     tokens_name_before_token = []
-    regex = r"^(?P<token_name>[A-Z_]+)\s*:"
+    regex = r"^\s*(?P<token_name>[A-Z_][A-Z_0-9]*)\s*:"
     start_placeholder = "Build id contains the non reserved keywords start."
     stop_placeholder = "Build id contains the non reserved keywords stop."
     begin = False
     with open(filepath, "r") as lexer_file:
         lines = lexer_file.readlines()
+        has_placeholder = any(start_placeholder in line for line in lines) and any(stop_placeholder in line for line in lines)
+        if not has_placeholder:
+            begin = True
+        reached_target_token = False
         for line in lines:
-            if start_placeholder in line:
+            if has_placeholder and start_placeholder in line:
                 begin = True
                 continue
             if line.isspace() or (not begin):
                 continue
-            if (stop_placeholder in line):
+            if has_placeholder and (stop_placeholder in line):
                 break
 
             matches = re.finditer(regex, line, re.MULTILINE)
             for matchNum, match in enumerate(matches, start=1):
                 if matchNum > 1:
                     break
                 if match.group("token_name") == token:
+                    reached_target_token = True
                     break
                 tokens_name_before_token.append(match.group("token_name"))
+            if reached_target_token:
+                break
     return tokens_name_before_token
 
 def pretty_print(tokens: list[str], hello: str | None) -> None:
@@ -197,9 +204,10 @@ def append_non_reserved_token_to_rules_in_parser(parser_file_path: str, append_r
 
 
 def get_content_by_token_name(content: str, token_name: str) -> str: 
-    token_regex = r"^(%s)\s*:[.\s\S]*?;" % token_name
-    # Get the content of the rules_regex match.
-    token_content = re.search(token_regex, content, re.MULTILINE)
+    # Support upstream grammar formatting where comments may appear between
+    # rule name and ':' and where rules can be indented.
+    token_regex = r"(?ms)^\s*(%s)\s*(?:\n\s*//[^\n]*)*\n\s*:[\s\S]*?\n\s*;" % re.escape(token_name)
+    token_content = re.search(token_regex, content)
     if token_content:
         return token_content.group(0)
     return None
@@ -214,4 +222,3 @@ def get_content_by_token_name(content: str, token_name: str) -> str:
     pretty_print(filtered_tokens, "Tokens before ID token without reserved keywords:")
     append_non_reserved_token_to_rules_in_parser("SnowflakeParser.g4", "id_", "supplement_non_reserved_words", filtered_tokens)
 
-
 
@@ -0,0 +1,39 @@
+-- internal stages
+-- named stage without schema
+create or replace pipe P1 auto_ingest=false as COPY INTO TABLE1 FROM @STAGE1;
+--named stage with schema
+create or replace pipe P2 auto_ingest=false as COPY INTO SCH1.TABLE1 FROM @SCH1.STAGE1;
+-- named stage with schema and path
+create or replace pipe P3 auto_ingest=false as COPY INTO TABLE1 FROM @SCH1.STAGE1/dir1/dir2/;
+create or replace pipe P4 auto_ingest=false as COPY INTO TABLE1 FROM @SCH1.STAGE1/;
+
+-- snowflake docs examples
+create pipe mypipe as copy into mytable from @mystage;
+
+create pipe mypipe2 as copy into mytable(C1, C2) from (select $5, $4 from @mystage);
+
+create pipe mypipe3 as copy into mytable(C1, C2) from (select $5, $4 from @mysch.mystage/x/y/z);
+
+create pipe mypipe_s3
+  auto_ingest = true
+  aws_sns_topic = 'arn:aws:sns:us-west-2:001234567890:s3_mybucket'
+  as
+  copy into snowpipe_db.public.mytable
+  from @snowpipe_db.public.mystage
+  file_format = (type = 'JSON');
+
+create pipe mypipe_gcs
+  auto_ingest = true
+  integration = 'MYINT'
+  as
+  copy into snowpipe_db.public.mytable
+  from @snowpipe_db.public.mystage
+  file_format = (type = 'JSON');  
+
+create pipe mypipe_azure
+  auto_ingest = true
+  integration = 'MYINT'
+  as
+  copy into snowpipe_db.public.mytable
+  from @snowpipe_db.public.mystage
+  file_format = (type = 'JSON');  
@@ -0,0 +1,21 @@
+CREATE OR REPLACE TASK test_logging
+  USER_TASK_MANAGED_INITIAL_WAREHOUSE_SIZE = 'XSMALL'
+  SCHEDULE = 'USING CRON  0 * * * * America/Los_Angeles'
+  AS
+    BEGIN
+      ALTER SESSION SET TIMESTAMP_OUTPUT_FORMAT = 'YYYY-MM-DD HH24:MI:SS.FF';
+      SELECT CURRENT_TIMESTAMP;
+    END;
+
+CREATE TASK t1
+  USER_TASK_MANAGED_INITIAL_WAREHOUSE_SIZE = 'XSMALL'
+  SCHEDULE = '15 SECONDS'
+  AS
+    DECLARE
+      radius_of_circle float;
+      area_of_circle float;
+    BEGIN
+      radius_of_circle := 3;
+      area_of_circle := pi() * radius_of_circle * radius_of_circle;
+      return area_of_circle;
+    END;