@@ -8093,6 +8093,34 @@ pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor,
80938093 return whitespace;
80948094}
80958095
8096+ /**
8097+ * Lex past the delimiter of a percent literal. Handle newlines and heredocs
8098+ * appropriately.
8099+ */
8100+ static uint8_t
8101+ pm_lex_percent_delimiter(pm_parser_t *parser) {
8102+ size_t eol_length = match_eol(parser);
8103+
8104+ if (eol_length) {
8105+ if (parser->heredoc_end) {
8106+ // If we have already lexed a heredoc, then the newline has already
8107+ // been added to the list. In this case we want to just flush the
8108+ // heredoc end.
8109+ parser_flush_heredoc_end(parser);
8110+ } else {
8111+ // Otherwise, we'll add the newline to the list of newlines.
8112+ pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
8113+ }
8114+
8115+ const uint8_t delimiter = *parser->current.end;
8116+ parser->current.end += eol_length;
8117+
8118+ return delimiter;
8119+ }
8120+
8121+ return *parser->current.end++;
8122+ }
8123+
80968124/**
80978125 * This is a convenience macro that will set the current token type, call the
80988126 * lex callback, and then return from the parser_lex function.
@@ -9049,15 +9077,8 @@ parser_lex(pm_parser_t *parser) {
90499077 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
90509078 }
90519079
9052- lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9053-
9054- size_t eol_length = match_eol(parser);
9055- if (eol_length) {
9056- parser->current.end += eol_length;
9057- pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9058- } else {
9059- parser->current.end++;
9060- }
9080+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9081+ lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
90619082
90629083 if (parser->current.end < parser->end) {
90639084 LEX(PM_TOKEN_STRING_BEGIN);
@@ -9077,7 +9098,7 @@ parser_lex(pm_parser_t *parser) {
90779098 parser->current.end++;
90789099
90799100 if (parser->current.end < parser->end) {
9080- lex_mode_push_list(parser, false, * parser->current.end++ );
9101+ lex_mode_push_list(parser, false, pm_lex_percent_delimiter( parser) );
90819102 } else {
90829103 lex_mode_push_list_eof(parser);
90839104 }
@@ -9088,7 +9109,7 @@ parser_lex(pm_parser_t *parser) {
90889109 parser->current.end++;
90899110
90909111 if (parser->current.end < parser->end) {
9091- lex_mode_push_list(parser, true, * parser->current.end++ );
9112+ lex_mode_push_list(parser, true, pm_lex_percent_delimiter( parser) );
90929113 } else {
90939114 lex_mode_push_list_eof(parser);
90949115 }
@@ -9099,9 +9120,8 @@ parser_lex(pm_parser_t *parser) {
90999120 parser->current.end++;
91009121
91019122 if (parser->current.end < parser->end) {
9102- lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9103- pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9104- parser->current.end++;
9123+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9124+ lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
91059125 } else {
91069126 lex_mode_push_regexp(parser, '\0', '\0');
91079127 }
@@ -9112,9 +9132,8 @@ parser_lex(pm_parser_t *parser) {
91129132 parser->current.end++;
91139133
91149134 if (parser->current.end < parser->end) {
9115- lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9116- pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9117- parser->current.end++;
9135+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9136+ lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
91189137 } else {
91199138 lex_mode_push_string_eof(parser);
91209139 }
@@ -9125,9 +9144,8 @@ parser_lex(pm_parser_t *parser) {
91259144 parser->current.end++;
91269145
91279146 if (parser->current.end < parser->end) {
9128- lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9129- pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9130- parser->current.end++;
9147+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9148+ lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
91319149 } else {
91329150 lex_mode_push_string_eof(parser);
91339151 }
@@ -9138,9 +9156,9 @@ parser_lex(pm_parser_t *parser) {
91389156 parser->current.end++;
91399157
91409158 if (parser->current.end < parser->end) {
9141- lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9159+ const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9160+ lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
91429161 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
9143- parser->current.end++;
91449162 } else {
91459163 lex_mode_push_string_eof(parser);
91469164 }
@@ -9151,7 +9169,7 @@ parser_lex(pm_parser_t *parser) {
91519169 parser->current.end++;
91529170
91539171 if (parser->current.end < parser->end) {
9154- lex_mode_push_list(parser, false, * parser->current.end++ );
9172+ lex_mode_push_list(parser, false, pm_lex_percent_delimiter( parser) );
91559173 } else {
91569174 lex_mode_push_list_eof(parser);
91579175 }
@@ -9162,7 +9180,7 @@ parser_lex(pm_parser_t *parser) {
91629180 parser->current.end++;
91639181
91649182 if (parser->current.end < parser->end) {
9165- lex_mode_push_list(parser, true, * parser->current.end++ );
9183+ lex_mode_push_list(parser, true, pm_lex_percent_delimiter( parser) );
91669184 } else {
91679185 lex_mode_push_list_eof(parser);
91689186 }
@@ -9173,8 +9191,8 @@ parser_lex(pm_parser_t *parser) {
91739191 parser->current.end++;
91749192
91759193 if (parser->current.end < parser->end) {
9176- lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(* parser->current.end) );
9177- parser->current.end++ ;
9194+ const uint8_t delimiter = pm_lex_percent_delimiter( parser);
9195+ lex_mode_push_string( parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter)) ;
91789196 } else {
91799197 lex_mode_push_string_eof(parser);
91809198 }
0 commit comments