Skip to content

Commit 78aa15a

Browse files
authored
Merge pull request #2141 from ruby/fix-assertion-error
Fix assertion on spanning heredocs
2 parents a81b4ce + e190308 commit 78aa15a

6 files changed

Lines changed: 223 additions & 50 deletions

File tree

include/prism/util/pm_newline_list.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,6 @@ bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t
7272
*/
7373
bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor);
7474

75-
/**
76-
* Conditionally append a new offset to the newline list, if the value passed in
77-
* is a newline.
78-
*
79-
* @param list The list to append to.
80-
* @param cursor A pointer to the offset to append.
81-
* @return True if the reallocation of the offsets succeeds (if one was
82-
* necessary), otherwise false.
83-
*/
84-
bool pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor);
85-
8675
/**
8776
* Returns the line and column of the given offset. If the offset is not in the
8877
* list, the line and column of the closest offset less than the given offset

src/prism.c

Lines changed: 44 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8093,6 +8093,34 @@ pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor,
80938093
return whitespace;
80948094
}
80958095

8096+
/**
8097+
* Lex past the delimiter of a percent literal. Handle newlines and heredocs
8098+
* appropriately.
8099+
*/
8100+
static uint8_t
8101+
pm_lex_percent_delimiter(pm_parser_t *parser) {
8102+
size_t eol_length = match_eol(parser);
8103+
8104+
if (eol_length) {
8105+
if (parser->heredoc_end) {
8106+
// If we have already lexed a heredoc, then the newline has already
8107+
// been added to the list. In this case we want to just flush the
8108+
// heredoc end.
8109+
parser_flush_heredoc_end(parser);
8110+
} else {
8111+
// Otherwise, we'll add the newline to the list of newlines.
8112+
pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
8113+
}
8114+
8115+
const uint8_t delimiter = *parser->current.end;
8116+
parser->current.end += eol_length;
8117+
8118+
return delimiter;
8119+
}
8120+
8121+
return *parser->current.end++;
8122+
}
8123+
80968124
/**
80978125
* This is a convenience macro that will set the current token type, call the
80988126
* lex callback, and then return from the parser_lex function.
@@ -9049,15 +9077,8 @@ parser_lex(pm_parser_t *parser) {
90499077
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
90509078
}
90519079

9052-
lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9053-
9054-
size_t eol_length = match_eol(parser);
9055-
if (eol_length) {
9056-
parser->current.end += eol_length;
9057-
pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
9058-
} else {
9059-
parser->current.end++;
9060-
}
9080+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9081+
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
90619082

90629083
if (parser->current.end < parser->end) {
90639084
LEX(PM_TOKEN_STRING_BEGIN);
@@ -9077,7 +9098,7 @@ parser_lex(pm_parser_t *parser) {
90779098
parser->current.end++;
90789099

90799100
if (parser->current.end < parser->end) {
9080-
lex_mode_push_list(parser, false, *parser->current.end++);
9101+
lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
90819102
} else {
90829103
lex_mode_push_list_eof(parser);
90839104
}
@@ -9088,7 +9109,7 @@ parser_lex(pm_parser_t *parser) {
90889109
parser->current.end++;
90899110

90909111
if (parser->current.end < parser->end) {
9091-
lex_mode_push_list(parser, true, *parser->current.end++);
9112+
lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
90929113
} else {
90939114
lex_mode_push_list_eof(parser);
90949115
}
@@ -9099,9 +9120,8 @@ parser_lex(pm_parser_t *parser) {
90999120
parser->current.end++;
91009121

91019122
if (parser->current.end < parser->end) {
9102-
lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9103-
pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9104-
parser->current.end++;
9123+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9124+
lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
91059125
} else {
91069126
lex_mode_push_regexp(parser, '\0', '\0');
91079127
}
@@ -9112,9 +9132,8 @@ parser_lex(pm_parser_t *parser) {
91129132
parser->current.end++;
91139133

91149134
if (parser->current.end < parser->end) {
9115-
lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9116-
pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9117-
parser->current.end++;
9135+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9136+
lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
91189137
} else {
91199138
lex_mode_push_string_eof(parser);
91209139
}
@@ -9125,9 +9144,8 @@ parser_lex(pm_parser_t *parser) {
91259144
parser->current.end++;
91269145

91279146
if (parser->current.end < parser->end) {
9128-
lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9129-
pm_newline_list_check_append(&parser->newline_list, parser->current.end);
9130-
parser->current.end++;
9147+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9148+
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
91319149
} else {
91329150
lex_mode_push_string_eof(parser);
91339151
}
@@ -9138,9 +9156,9 @@ parser_lex(pm_parser_t *parser) {
91389156
parser->current.end++;
91399157

91409158
if (parser->current.end < parser->end) {
9141-
lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9159+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9160+
lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
91429161
lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
9143-
parser->current.end++;
91449162
} else {
91459163
lex_mode_push_string_eof(parser);
91469164
}
@@ -9151,7 +9169,7 @@ parser_lex(pm_parser_t *parser) {
91519169
parser->current.end++;
91529170

91539171
if (parser->current.end < parser->end) {
9154-
lex_mode_push_list(parser, false, *parser->current.end++);
9172+
lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
91559173
} else {
91569174
lex_mode_push_list_eof(parser);
91579175
}
@@ -9162,7 +9180,7 @@ parser_lex(pm_parser_t *parser) {
91629180
parser->current.end++;
91639181

91649182
if (parser->current.end < parser->end) {
9165-
lex_mode_push_list(parser, true, *parser->current.end++);
9183+
lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
91669184
} else {
91679185
lex_mode_push_list_eof(parser);
91689186
}
@@ -9173,8 +9191,8 @@ parser_lex(pm_parser_t *parser) {
91739191
parser->current.end++;
91749192

91759193
if (parser->current.end < parser->end) {
9176-
lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
9177-
parser->current.end++;
9194+
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
9195+
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
91789196
} else {
91799197
lex_mode_push_string_eof(parser);
91809198
}

src/util/pm_newline_list.c

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,18 +45,6 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
4545
return true;
4646
}
4747

48-
/**
49-
* Conditionally append a new offset to the newline list, if the value passed in
50-
* is a newline.
51-
*/
52-
bool
53-
pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor) {
54-
if (*cursor != '\n') {
55-
return true;
56-
}
57-
return pm_newline_list_append(list, cursor);
58-
}
59-
6048
/**
6149
* Returns the line and column of the given offset. If the offset is not in the
6250
* list, the line and column of the closest offset less than the given offset
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<<A+%
2+
A
3+
4+
5+
<<A+%r
6+
A
7+
8+
9+
<<A+%q
10+
A
11+
12+
13+
<<A+%Q
14+
A
15+
16+
17+
<<A+%s
18+
A
19+
20+
21+
<<A+%x
22+
A
23+

test/prism/parse_test.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ def test_parse_lex_file
211211
end
212212

213213
Dir["*.txt", base: base].each do |relative|
214-
next if relative == "newline_terminated.txt"
214+
next if relative == "newline_terminated.txt" || relative == "spanning_heredoc_newlines.txt"
215215

216216
# We test every snippet (separated by \n\n) in isolation
217217
# to ensure the parser does not try to read bytes further than the end of each snippet

test/prism/snapshots/spanning_heredoc_newlines.txt

Lines changed: 155 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)