[ruby/yarp] fix: regexes and strings with escaped newline around a heredoc

Found via the fuzzer.

https://github.com/ruby/yarp/commit/501757135a

Co-authored-by: Kevin Newton <kddnewton@gmail.com>
This commit is contained in:
Mike Dalessio 2023-08-25 10:12:13 -04:00 committed by git
parent bf3d48e182
commit 3525c460f9
4 changed files with 130 additions and 7 deletions

View File

@ -0,0 +1,13 @@
# test regex, string, and lists that wrap a heredoc thanks to an escaped newline
# ripper incorrectly creates a "b\nc" string instead of two separate string tokens
pp <<-A.gsub(/b\
a
A
c/, "")
# ripper incorrectly creates a "e\nf" string instead of two separate string tokens
pp <<-A + "e\
d
A
f"

View File

@ -112,6 +112,10 @@ class ParseTest < Test::Unit::TestCase
# Waiting for feedback on https://bugs.ruby-lang.org/issues/19838.
return if relative == "seattlerb/heredoc_nested.txt"
# Ripper seems to have a bug that the regex portions before and after the heredoc are combined
# into a single token.
return if relative == "wrapping_heredoc.txt"
# Finally, assert that we can lex the source and get the same tokens as
# Ripper.
lex_result = YARP.lex_compat(source)

View File

@ -0,0 +1,80 @@
ProgramNode(165...298)(
[],
StatementsNode(165...298)(
[CallNode(165...193)(
nil,
nil,
(165...167),
nil,
ArgumentsNode(168...193)(
[CallNode(168...193)(
InterpolatedStringNode(168...172)(
(168...172),
[StringNode(182...184)(nil, (182...184), nil, "a\n")],
(184...186)
),
(172...173),
(173...177),
(177...178),
ArgumentsNode(178...192)(
[InterpolatedRegularExpressionNode(178...188)(
(178...179),
[StringNode(179...182)(nil, (179...182), nil, "b"),
StringNode(186...187)(nil, (186...187), nil, "c")],
(187...188),
0
),
StringNode(190...192)(
(190...191),
(191...191),
(191...192),
""
)]
),
(192...193),
nil,
0,
"gsub"
)]
),
nil,
nil,
0,
"pp"
),
CallNode(278...298)(
nil,
nil,
(278...280),
nil,
ArgumentsNode(281...298)(
[CallNode(281...298)(
InterpolatedStringNode(281...285)(
(281...285),
[StringNode(292...294)(nil, (292...294), nil, "d\n")],
(294...296)
),
nil,
(286...287),
nil,
ArgumentsNode(288...298)(
[InterpolatedStringNode(288...298)(
(288...289),
[StringNode(289...292)(nil, (289...292), nil, "e"),
StringNode(296...297)(nil, (296...297), nil, "f")],
(297...298)
)]
),
nil,
nil,
0,
"+"
)]
),
nil,
nil,
0,
"pp"
)]
)
)

View File

@ -6614,7 +6614,13 @@ parser_lex(yp_parser_t *parser) {
case YP_LEX_REGEXP: {
// First, we'll set to start of this token to be the current end.
if (parser->next_start == NULL) {
parser->current.start = parser->current.end;
} else {
parser->current.start = parser->next_start;
parser->current.end = parser->next_start;
parser->next_start = NULL;
}
// We'll check if we're at the end of the file. If we are, then we need to
// return the EOF token.
@ -6693,9 +6699,19 @@ parser_lex(yp_parser_t *parser) {
if (*breakpoint == '\\') {
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, YP_UNESCAPE_ALL, false);
// If the result is an escaped newline, then we need to
// track that newline.
yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
// If the result is an escaped newline ...
if (*(breakpoint + difference - 1) == '\n') {
if (parser->heredoc_end) {
// ... if we are on the same line as a heredoc, flush the heredoc and
// continue parsing after heredoc_end.
parser->current.end = breakpoint + difference;
parser_flush_heredoc_end(parser);
LEX(YP_TOKEN_STRING_CONTENT);
} else {
// ... else track the newline.
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
}
}
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
continue;
@ -6833,9 +6849,19 @@ parser_lex(yp_parser_t *parser) {
yp_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
// If the result is an escaped newline, then we need to
// track that newline.
yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
// If the result is an escaped newline ...
if (*(breakpoint + difference - 1) == '\n') {
if (parser->heredoc_end) {
// ... if we are on the same line as a heredoc, flush the heredoc and
// continue parsing after heredoc_end.
parser->current.end = breakpoint + difference;
parser_flush_heredoc_end(parser);
LEX(YP_TOKEN_STRING_CONTENT);
} else {
// ... else track the newline.
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
}
}
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
break;