[ruby/yarp] fix: regexes and strings with escaped newline around a heredoc
Found via the fuzzer. https://github.com/ruby/yarp/commit/501757135a Co-authored-by: Kevin Newton <kddnewton@gmail.com>
This commit is contained in:
parent
bf3d48e182
commit
3525c460f9
13
test/yarp/fixtures/wrapping_heredoc.txt
Normal file
13
test/yarp/fixtures/wrapping_heredoc.txt
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
# test regex, string, and lists that wrap a heredoc thanks to an escaped newline
|
||||||
|
|
||||||
|
# ripper incorrectly creates a "b\nc" string instead of two separate string tokens
|
||||||
|
pp <<-A.gsub(/b\
|
||||||
|
a
|
||||||
|
A
|
||||||
|
c/, "")
|
||||||
|
|
||||||
|
# ripper incorrectly creates a "e\nf" string instead of two separate string tokens
|
||||||
|
pp <<-A + "e\
|
||||||
|
d
|
||||||
|
A
|
||||||
|
f"
|
@ -112,6 +112,10 @@ class ParseTest < Test::Unit::TestCase
|
|||||||
# Waiting for feedback on https://bugs.ruby-lang.org/issues/19838.
|
# Waiting for feedback on https://bugs.ruby-lang.org/issues/19838.
|
||||||
return if relative == "seattlerb/heredoc_nested.txt"
|
return if relative == "seattlerb/heredoc_nested.txt"
|
||||||
|
|
||||||
|
# Ripper seems to have a bug that the regex portions before and after the heredoc are combined
|
||||||
|
# into a single token.
|
||||||
|
return if relative == "wrapping_heredoc.txt"
|
||||||
|
|
||||||
# Finally, assert that we can lex the source and get the same tokens as
|
# Finally, assert that we can lex the source and get the same tokens as
|
||||||
# Ripper.
|
# Ripper.
|
||||||
lex_result = YARP.lex_compat(source)
|
lex_result = YARP.lex_compat(source)
|
||||||
|
80
test/yarp/snapshots/wrapping_heredoc.txt
Normal file
80
test/yarp/snapshots/wrapping_heredoc.txt
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
ProgramNode(165...298)(
|
||||||
|
[],
|
||||||
|
StatementsNode(165...298)(
|
||||||
|
[CallNode(165...193)(
|
||||||
|
nil,
|
||||||
|
nil,
|
||||||
|
(165...167),
|
||||||
|
nil,
|
||||||
|
ArgumentsNode(168...193)(
|
||||||
|
[CallNode(168...193)(
|
||||||
|
InterpolatedStringNode(168...172)(
|
||||||
|
(168...172),
|
||||||
|
[StringNode(182...184)(nil, (182...184), nil, "a\n")],
|
||||||
|
(184...186)
|
||||||
|
),
|
||||||
|
(172...173),
|
||||||
|
(173...177),
|
||||||
|
(177...178),
|
||||||
|
ArgumentsNode(178...192)(
|
||||||
|
[InterpolatedRegularExpressionNode(178...188)(
|
||||||
|
(178...179),
|
||||||
|
[StringNode(179...182)(nil, (179...182), nil, "b"),
|
||||||
|
StringNode(186...187)(nil, (186...187), nil, "c")],
|
||||||
|
(187...188),
|
||||||
|
0
|
||||||
|
),
|
||||||
|
StringNode(190...192)(
|
||||||
|
(190...191),
|
||||||
|
(191...191),
|
||||||
|
(191...192),
|
||||||
|
""
|
||||||
|
)]
|
||||||
|
),
|
||||||
|
(192...193),
|
||||||
|
nil,
|
||||||
|
0,
|
||||||
|
"gsub"
|
||||||
|
)]
|
||||||
|
),
|
||||||
|
nil,
|
||||||
|
nil,
|
||||||
|
0,
|
||||||
|
"pp"
|
||||||
|
),
|
||||||
|
CallNode(278...298)(
|
||||||
|
nil,
|
||||||
|
nil,
|
||||||
|
(278...280),
|
||||||
|
nil,
|
||||||
|
ArgumentsNode(281...298)(
|
||||||
|
[CallNode(281...298)(
|
||||||
|
InterpolatedStringNode(281...285)(
|
||||||
|
(281...285),
|
||||||
|
[StringNode(292...294)(nil, (292...294), nil, "d\n")],
|
||||||
|
(294...296)
|
||||||
|
),
|
||||||
|
nil,
|
||||||
|
(286...287),
|
||||||
|
nil,
|
||||||
|
ArgumentsNode(288...298)(
|
||||||
|
[InterpolatedStringNode(288...298)(
|
||||||
|
(288...289),
|
||||||
|
[StringNode(289...292)(nil, (289...292), nil, "e"),
|
||||||
|
StringNode(296...297)(nil, (296...297), nil, "f")],
|
||||||
|
(297...298)
|
||||||
|
)]
|
||||||
|
),
|
||||||
|
nil,
|
||||||
|
nil,
|
||||||
|
0,
|
||||||
|
"+"
|
||||||
|
)]
|
||||||
|
),
|
||||||
|
nil,
|
||||||
|
nil,
|
||||||
|
0,
|
||||||
|
"pp"
|
||||||
|
)]
|
||||||
|
)
|
||||||
|
)
|
38
yarp/yarp.c
38
yarp/yarp.c
@ -6614,7 +6614,13 @@ parser_lex(yp_parser_t *parser) {
|
|||||||
|
|
||||||
case YP_LEX_REGEXP: {
|
case YP_LEX_REGEXP: {
|
||||||
// First, we'll set to start of this token to be the current end.
|
// First, we'll set to start of this token to be the current end.
|
||||||
|
if (parser->next_start == NULL) {
|
||||||
parser->current.start = parser->current.end;
|
parser->current.start = parser->current.end;
|
||||||
|
} else {
|
||||||
|
parser->current.start = parser->next_start;
|
||||||
|
parser->current.end = parser->next_start;
|
||||||
|
parser->next_start = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
// We'll check if we're at the end of the file. If we are, then we need to
|
// We'll check if we're at the end of the file. If we are, then we need to
|
||||||
// return the EOF token.
|
// return the EOF token.
|
||||||
@ -6693,9 +6699,19 @@ parser_lex(yp_parser_t *parser) {
|
|||||||
if (*breakpoint == '\\') {
|
if (*breakpoint == '\\') {
|
||||||
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, YP_UNESCAPE_ALL, false);
|
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, YP_UNESCAPE_ALL, false);
|
||||||
|
|
||||||
// If the result is an escaped newline, then we need to
|
// If the result is an escaped newline ...
|
||||||
// track that newline.
|
if (*(breakpoint + difference - 1) == '\n') {
|
||||||
yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
|
if (parser->heredoc_end) {
|
||||||
|
// ... if we are on the same line as a heredoc, flush the heredoc and
|
||||||
|
// continue parsing after heredoc_end.
|
||||||
|
parser->current.end = breakpoint + difference;
|
||||||
|
parser_flush_heredoc_end(parser);
|
||||||
|
LEX(YP_TOKEN_STRING_CONTENT);
|
||||||
|
} else {
|
||||||
|
// ... else track the newline.
|
||||||
|
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
|
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
|
||||||
continue;
|
continue;
|
||||||
@ -6833,9 +6849,19 @@ parser_lex(yp_parser_t *parser) {
|
|||||||
yp_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
|
yp_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
|
||||||
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
|
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
|
||||||
|
|
||||||
// If the result is an escaped newline, then we need to
|
// If the result is an escaped newline ...
|
||||||
// track that newline.
|
if (*(breakpoint + difference - 1) == '\n') {
|
||||||
yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
|
if (parser->heredoc_end) {
|
||||||
|
// ... if we are on the same line as a heredoc, flush the heredoc and
|
||||||
|
// continue parsing after heredoc_end.
|
||||||
|
parser->current.end = breakpoint + difference;
|
||||||
|
parser_flush_heredoc_end(parser);
|
||||||
|
LEX(YP_TOKEN_STRING_CONTENT);
|
||||||
|
} else {
|
||||||
|
// ... else track the newline.
|
||||||
|
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
|
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
|
||||||
break;
|
break;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user