From 77e971b6ec93810842a8ef320e412174267dab6d Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sun, 27 Aug 2023 16:32:19 -0400 Subject: [PATCH] [ruby/yarp] fix: %w list spanning a heredoc Two fixes were necessary: - ensure we are handling newlines correctly - accept two consecutive string tokens without a separator https://github.com/ruby/yarp/commit/4e707937cb Co-authored-by: Kevin Newton --- test/yarp/fixtures/spanning_heredoc.txt | 6 ++++++ test/yarp/parse_test.rb | 2 +- test/yarp/snapshots/spanning_heredoc.txt | 27 ++++++++++++++++++++++-- yarp/yarp.c | 25 ++++++++++++++-------- 4 files changed, 48 insertions(+), 12 deletions(-) diff --git a/test/yarp/fixtures/spanning_heredoc.txt b/test/yarp/fixtures/spanning_heredoc.txt index 5040840e76..1b17edac5d 100644 --- a/test/yarp/fixtures/spanning_heredoc.txt +++ b/test/yarp/fixtures/spanning_heredoc.txt @@ -23,3 +23,9 @@ pp <<-A, %Q[h\ g A h] + +# ripper can't parse this successfully, though ruby runs it correctly +pp <<-A, %w[j\ +i +A +j] diff --git a/test/yarp/parse_test.rb b/test/yarp/parse_test.rb index 1eb0033208..b288d597b2 100644 --- a/test/yarp/parse_test.rb +++ b/test/yarp/parse_test.rb @@ -77,7 +77,7 @@ class ParseTest < Test::Unit::TestCase # into a single token. See https://bugs.ruby-lang.org/issues/19838. # # Additionally, Ripper cannot parse the %w[] fixture in this file, so set ripper_should_parse to false. - ripper_should_match = false if relative == "spanning_heredoc.txt" + ripper_should_parse = false if relative == "spanning_heredoc.txt" define_method "test_filepath_#{relative}" do # First, read the source from the filepath. Use binmode to avoid converting CRLF on Windows, diff --git a/test/yarp/snapshots/spanning_heredoc.txt b/test/yarp/snapshots/spanning_heredoc.txt index 244ced125b..f948e1e9d3 100644 --- a/test/yarp/snapshots/spanning_heredoc.txt +++ b/test/yarp/snapshots/spanning_heredoc.txt @@ -1,6 +1,6 @@ -ProgramNode(164...448)( +ProgramNode(164...541)( [], - StatementsNode(164...448)( + StatementsNode(164...541)( [CallNode(164...192)( nil, nil, @@ -110,6 +110,29 @@ ProgramNode(164...448)( nil, 0, "pp" + ), + CallNode(520...541)( + nil, + nil, + (520...522), + nil, + ArgumentsNode(523...541)( + [InterpolatedStringNode(523...527)( + (523...527), + [StringNode(535...537)(nil, (535...537), nil, "i\n")], + (537...539) + ), + ArrayNode(529...541)( + [StringNode(532...535)(nil, (532...535), nil, "j\\\n"), + StringNode(539...540)(nil, (539...540), nil, "j")], + (529...532), + (540...541) + )] + ), + nil, + nil, + 0, + "pp" )] ) ) diff --git a/yarp/yarp.c b/yarp/yarp.c index 129a176dec..a4b1c9be3c 100644 --- a/yarp/yarp.c +++ b/yarp/yarp.c @@ -6942,9 +6942,19 @@ parser_lex(yp_parser_t *parser) { yp_unescape_type_t unescape_type = lex_mode->as.list.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL; size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false); - // If the result is an escaped newline, then we need to - // track that newline. - yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1); + // If the result is an escaped newline ... + if (*(breakpoint + difference - 1) == '\n') { + if (parser->heredoc_end) { + // ... if we are on the same line as a heredoc, flush the heredoc and + // continue parsing after heredoc_end. + parser->current.end = breakpoint + difference; + parser_flush_heredoc_end(parser); + LEX(YP_TOKEN_STRING_CONTENT); + } else { + // ... else track the newline. + yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1); + } + } breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference)); continue; @@ -12098,12 +12108,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { accept(parser, YP_TOKEN_WORDS_SEP); while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) { - if (yp_array_node_size(array) == 0) { - accept(parser, YP_TOKEN_WORDS_SEP); - } else { - expect(parser, YP_TOKEN_WORDS_SEP, "Expected a separator for the strings in a `%w` list."); - if (match_type_p(parser, YP_TOKEN_STRING_END)) break; - } + accept(parser, YP_TOKEN_WORDS_SEP); + if (match_type_p(parser, YP_TOKEN_STRING_END)) break; + expect(parser, YP_TOKEN_STRING_CONTENT, "Expected a string in a `%w` list."); yp_token_t opening = not_provided(parser);