[ruby/prism] Fix assertion on spanning heredocs
https://github.com/ruby/prism/commit/e190308845
This commit is contained in:
parent
02d8bad6e1
commit
88d7838445
@ -8093,6 +8093,34 @@ pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor,
|
||||
return whitespace;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lex past the delimiter of a percent literal. Handle newlines and heredocs
|
||||
* appropriately.
|
||||
*/
|
||||
static uint8_t
|
||||
pm_lex_percent_delimiter(pm_parser_t *parser) {
|
||||
size_t eol_length = match_eol(parser);
|
||||
|
||||
if (eol_length) {
|
||||
if (parser->heredoc_end) {
|
||||
// If we have already lexed a heredoc, then the newline has already
|
||||
// been added to the list. In this case we want to just flush the
|
||||
// heredoc end.
|
||||
parser_flush_heredoc_end(parser);
|
||||
} else {
|
||||
// Otherwise, we'll add the newline to the list of newlines.
|
||||
pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
|
||||
}
|
||||
|
||||
const uint8_t delimiter = *parser->current.end;
|
||||
parser->current.end += eol_length;
|
||||
|
||||
return delimiter;
|
||||
}
|
||||
|
||||
return *parser->current.end++;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a convenience macro that will set the current token type, call the
|
||||
* lex callback, and then return from the parser_lex function.
|
||||
@ -9049,15 +9077,8 @@ parser_lex(pm_parser_t *parser) {
|
||||
pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
|
||||
}
|
||||
|
||||
lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
|
||||
|
||||
size_t eol_length = match_eol(parser);
|
||||
if (eol_length) {
|
||||
parser->current.end += eol_length;
|
||||
pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
|
||||
} else {
|
||||
parser->current.end++;
|
||||
}
|
||||
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
||||
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
||||
|
||||
if (parser->current.end < parser->end) {
|
||||
LEX(PM_TOKEN_STRING_BEGIN);
|
||||
@ -9077,7 +9098,7 @@ parser_lex(pm_parser_t *parser) {
|
||||
parser->current.end++;
|
||||
|
||||
if (parser->current.end < parser->end) {
|
||||
lex_mode_push_list(parser, false, *parser->current.end++);
|
||||
lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
|
||||
} else {
|
||||
lex_mode_push_list_eof(parser);
|
||||
}
|
||||
@ -9088,7 +9109,7 @@ parser_lex(pm_parser_t *parser) {
|
||||
parser->current.end++;
|
||||
|
||||
if (parser->current.end < parser->end) {
|
||||
lex_mode_push_list(parser, true, *parser->current.end++);
|
||||
lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
|
||||
} else {
|
||||
lex_mode_push_list_eof(parser);
|
||||
}
|
||||
@ -9099,9 +9120,8 @@ parser_lex(pm_parser_t *parser) {
|
||||
parser->current.end++;
|
||||
|
||||
if (parser->current.end < parser->end) {
|
||||
lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
|
||||
pm_newline_list_check_append(&parser->newline_list, parser->current.end);
|
||||
parser->current.end++;
|
||||
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
||||
lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
||||
} else {
|
||||
lex_mode_push_regexp(parser, '\0', '\0');
|
||||
}
|
||||
@ -9112,9 +9132,8 @@ parser_lex(pm_parser_t *parser) {
|
||||
parser->current.end++;
|
||||
|
||||
if (parser->current.end < parser->end) {
|
||||
lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
|
||||
pm_newline_list_check_append(&parser->newline_list, parser->current.end);
|
||||
parser->current.end++;
|
||||
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
||||
lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
||||
} else {
|
||||
lex_mode_push_string_eof(parser);
|
||||
}
|
||||
@ -9125,9 +9144,8 @@ parser_lex(pm_parser_t *parser) {
|
||||
parser->current.end++;
|
||||
|
||||
if (parser->current.end < parser->end) {
|
||||
lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
|
||||
pm_newline_list_check_append(&parser->newline_list, parser->current.end);
|
||||
parser->current.end++;
|
||||
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
||||
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
||||
} else {
|
||||
lex_mode_push_string_eof(parser);
|
||||
}
|
||||
@ -9138,9 +9156,9 @@ parser_lex(pm_parser_t *parser) {
|
||||
parser->current.end++;
|
||||
|
||||
if (parser->current.end < parser->end) {
|
||||
lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
|
||||
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
||||
lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
||||
lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
|
||||
parser->current.end++;
|
||||
} else {
|
||||
lex_mode_push_string_eof(parser);
|
||||
}
|
||||
@ -9151,7 +9169,7 @@ parser_lex(pm_parser_t *parser) {
|
||||
parser->current.end++;
|
||||
|
||||
if (parser->current.end < parser->end) {
|
||||
lex_mode_push_list(parser, false, *parser->current.end++);
|
||||
lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
|
||||
} else {
|
||||
lex_mode_push_list_eof(parser);
|
||||
}
|
||||
@ -9162,7 +9180,7 @@ parser_lex(pm_parser_t *parser) {
|
||||
parser->current.end++;
|
||||
|
||||
if (parser->current.end < parser->end) {
|
||||
lex_mode_push_list(parser, true, *parser->current.end++);
|
||||
lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
|
||||
} else {
|
||||
lex_mode_push_list_eof(parser);
|
||||
}
|
||||
@ -9173,8 +9191,8 @@ parser_lex(pm_parser_t *parser) {
|
||||
parser->current.end++;
|
||||
|
||||
if (parser->current.end < parser->end) {
|
||||
lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
|
||||
parser->current.end++;
|
||||
const uint8_t delimiter = pm_lex_percent_delimiter(parser);
|
||||
lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
|
||||
} else {
|
||||
lex_mode_push_string_eof(parser);
|
||||
}
|
||||
|
@ -45,18 +45,6 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Conditionally append a new offset to the newline list, if the value passed in
|
||||
* is a newline.
|
||||
*/
|
||||
bool
|
||||
pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor) {
|
||||
if (*cursor != '\n') {
|
||||
return true;
|
||||
}
|
||||
return pm_newline_list_append(list, cursor);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the line and column of the given offset. If the offset is not in the
|
||||
* list, the line and column of the closest offset less than the given offset
|
||||
|
@ -72,17 +72,6 @@ bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t
|
||||
*/
|
||||
bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor);
|
||||
|
||||
/**
|
||||
* Conditionally append a new offset to the newline list, if the value passed in
|
||||
* is a newline.
|
||||
*
|
||||
* @param list The list to append to.
|
||||
* @param cursor A pointer to the offset to append.
|
||||
* @return True if the reallocation of the offsets succeeds (if one was
|
||||
* necessary), otherwise false.
|
||||
*/
|
||||
bool pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor);
|
||||
|
||||
/**
|
||||
* Returns the line and column of the given offset. If the offset is not in the
|
||||
* list, the line and column of the closest offset less than the given offset
|
||||
|
23
test/prism/fixtures/spanning_heredoc_newlines.txt
Normal file
23
test/prism/fixtures/spanning_heredoc_newlines.txt
Normal file
@ -0,0 +1,23 @@
|
||||
<<A+%
|
||||
A
|
||||
|
||||
|
||||
<<A+%r
|
||||
A
|
||||
|
||||
|
||||
<<A+%q
|
||||
A
|
||||
|
||||
|
||||
<<A+%Q
|
||||
A
|
||||
|
||||
|
||||
<<A+%s
|
||||
A
|
||||
|
||||
|
||||
<<A+%x
|
||||
A
|
||||
|
@ -211,7 +211,7 @@ module Prism
|
||||
end
|
||||
|
||||
Dir["*.txt", base: base].each do |relative|
|
||||
next if relative == "newline_terminated.txt"
|
||||
next if relative == "newline_terminated.txt" || relative == "spanning_heredoc_newlines.txt"
|
||||
|
||||
# We test every snippet (separated by \n\n) in isolation
|
||||
# to ensure the parser does not try to read bytes further than the end of each snippet
|
||||
|
155
test/prism/snapshots/spanning_heredoc_newlines.txt
Normal file
155
test/prism/snapshots/spanning_heredoc_newlines.txt
Normal file
@ -0,0 +1,155 @@
|
||||
@ ProgramNode (location: (1,0)-(24,0))
|
||||
├── locals: []
|
||||
└── statements:
|
||||
@ StatementsNode (location: (1,0)-(24,0))
|
||||
└── body: (length: 6)
|
||||
├── @ CallNode (location: (1,0)-(4,0))
|
||||
│ ├── flags: ∅
|
||||
│ ├── receiver:
|
||||
│ │ @ StringNode (location: (1,0)-(1,3))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: (1,0)-(1,3) = "<<A"
|
||||
│ │ ├── content_loc: (2,0)-(2,0) = ""
|
||||
│ │ ├── closing_loc: (2,0)-(3,0) = "A\n"
|
||||
│ │ └── unescaped: ""
|
||||
│ ├── call_operator_loc: ∅
|
||||
│ ├── name: :+
|
||||
│ ├── message_loc: (1,3)-(1,4) = "+"
|
||||
│ ├── opening_loc: ∅
|
||||
│ ├── arguments:
|
||||
│ │ @ ArgumentsNode (location: (1,4)-(4,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ └── arguments: (length: 1)
|
||||
│ │ └── @ StringNode (location: (1,4)-(4,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: (1,4)-(2,0) = "%\n"
|
||||
│ │ ├── content_loc: (3,0)-(3,0) = ""
|
||||
│ │ ├── closing_loc: (3,0)-(4,0) = "\n"
|
||||
│ │ └── unescaped: ""
|
||||
│ ├── closing_loc: ∅
|
||||
│ └── block: ∅
|
||||
├── @ CallNode (location: (5,0)-(8,0))
|
||||
│ ├── flags: ∅
|
||||
│ ├── receiver:
|
||||
│ │ @ StringNode (location: (5,0)-(5,3))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: (5,0)-(5,3) = "<<A"
|
||||
│ │ ├── content_loc: (6,0)-(6,0) = ""
|
||||
│ │ ├── closing_loc: (6,0)-(7,0) = "A\n"
|
||||
│ │ └── unescaped: ""
|
||||
│ ├── call_operator_loc: ∅
|
||||
│ ├── name: :+
|
||||
│ ├── message_loc: (5,3)-(5,4) = "+"
|
||||
│ ├── opening_loc: ∅
|
||||
│ ├── arguments:
|
||||
│ │ @ ArgumentsNode (location: (5,4)-(8,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ └── arguments: (length: 1)
|
||||
│ │ └── @ RegularExpressionNode (location: (5,4)-(8,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: (5,4)-(6,0) = "%r\n"
|
||||
│ │ ├── content_loc: (6,0)-(6,0) = ""
|
||||
│ │ ├── closing_loc: (7,0)-(8,0) = "\n"
|
||||
│ │ └── unescaped: ""
|
||||
│ ├── closing_loc: ∅
|
||||
│ └── block: ∅
|
||||
├── @ CallNode (location: (9,0)-(12,0))
|
||||
│ ├── flags: ∅
|
||||
│ ├── receiver:
|
||||
│ │ @ StringNode (location: (9,0)-(9,3))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: (9,0)-(9,3) = "<<A"
|
||||
│ │ ├── content_loc: (10,0)-(10,0) = ""
|
||||
│ │ ├── closing_loc: (10,0)-(11,0) = "A\n"
|
||||
│ │ └── unescaped: ""
|
||||
│ ├── call_operator_loc: ∅
|
||||
│ ├── name: :+
|
||||
│ ├── message_loc: (9,3)-(9,4) = "+"
|
||||
│ ├── opening_loc: ∅
|
||||
│ ├── arguments:
|
||||
│ │ @ ArgumentsNode (location: (9,4)-(12,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ └── arguments: (length: 1)
|
||||
│ │ └── @ StringNode (location: (9,4)-(12,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: (9,4)-(10,0) = "%q\n"
|
||||
│ │ ├── content_loc: (11,0)-(11,0) = ""
|
||||
│ │ ├── closing_loc: (11,0)-(12,0) = "\n"
|
||||
│ │ └── unescaped: ""
|
||||
│ ├── closing_loc: ∅
|
||||
│ └── block: ∅
|
||||
├── @ CallNode (location: (13,0)-(16,0))
|
||||
│ ├── flags: ∅
|
||||
│ ├── receiver:
|
||||
│ │ @ StringNode (location: (13,0)-(13,3))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: (13,0)-(13,3) = "<<A"
|
||||
│ │ ├── content_loc: (14,0)-(14,0) = ""
|
||||
│ │ ├── closing_loc: (14,0)-(15,0) = "A\n"
|
||||
│ │ └── unescaped: ""
|
||||
│ ├── call_operator_loc: ∅
|
||||
│ ├── name: :+
|
||||
│ ├── message_loc: (13,3)-(13,4) = "+"
|
||||
│ ├── opening_loc: ∅
|
||||
│ ├── arguments:
|
||||
│ │ @ ArgumentsNode (location: (13,4)-(16,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ └── arguments: (length: 1)
|
||||
│ │ └── @ StringNode (location: (13,4)-(16,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: (13,4)-(14,0) = "%Q\n"
|
||||
│ │ ├── content_loc: (15,0)-(15,0) = ""
|
||||
│ │ ├── closing_loc: (15,0)-(16,0) = "\n"
|
||||
│ │ └── unescaped: ""
|
||||
│ ├── closing_loc: ∅
|
||||
│ └── block: ∅
|
||||
├── @ CallNode (location: (17,0)-(20,0))
|
||||
│ ├── flags: ∅
|
||||
│ ├── receiver:
|
||||
│ │ @ StringNode (location: (17,0)-(17,3))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: (17,0)-(17,3) = "<<A"
|
||||
│ │ ├── content_loc: (18,0)-(18,0) = ""
|
||||
│ │ ├── closing_loc: (18,0)-(19,0) = "A\n"
|
||||
│ │ └── unescaped: ""
|
||||
│ ├── call_operator_loc: ∅
|
||||
│ ├── name: :+
|
||||
│ ├── message_loc: (17,3)-(17,4) = "+"
|
||||
│ ├── opening_loc: ∅
|
||||
│ ├── arguments:
|
||||
│ │ @ ArgumentsNode (location: (17,4)-(20,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ └── arguments: (length: 1)
|
||||
│ │ └── @ SymbolNode (location: (17,4)-(20,0))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: (17,4)-(18,0) = "%s\n"
|
||||
│ │ ├── value_loc: (18,0)-(18,0) = ""
|
||||
│ │ ├── closing_loc: (19,0)-(20,0) = "\n"
|
||||
│ │ └── unescaped: ""
|
||||
│ ├── closing_loc: ∅
|
||||
│ └── block: ∅
|
||||
└── @ CallNode (location: (21,0)-(24,0))
|
||||
├── flags: ∅
|
||||
├── receiver:
|
||||
│ @ StringNode (location: (21,0)-(21,3))
|
||||
│ ├── flags: ∅
|
||||
│ ├── opening_loc: (21,0)-(21,3) = "<<A"
|
||||
│ ├── content_loc: (22,0)-(22,0) = ""
|
||||
│ ├── closing_loc: (22,0)-(23,0) = "A\n"
|
||||
│ └── unescaped: ""
|
||||
├── call_operator_loc: ∅
|
||||
├── name: :+
|
||||
├── message_loc: (21,3)-(21,4) = "+"
|
||||
├── opening_loc: ∅
|
||||
├── arguments:
|
||||
│ @ ArgumentsNode (location: (21,4)-(24,0))
|
||||
│ ├── flags: ∅
|
||||
│ └── arguments: (length: 1)
|
||||
│ └── @ XStringNode (location: (21,4)-(24,0))
|
||||
│ ├── flags: ∅
|
||||
│ ├── opening_loc: (21,4)-(22,0) = "%x\n"
|
||||
│ ├── content_loc: (22,0)-(22,0) = ""
|
||||
│ ├── closing_loc: (23,0)-(24,0) = "\n"
|
||||
│ └── unescaped: ""
|
||||
├── closing_loc: ∅
|
||||
└── block: ∅
|
Loading…
x
Reference in New Issue
Block a user