[ruby/yarp] fix: %w list spanning a heredoc

Two fixes were necessary: - ensure we are handling newlines correctly - accept two consecutive string tokens without a separator https://github.com/ruby/yarp/commit/4e707937cb Co-authored-by: Kevin Newton <kddnewton@gmail.com>
2023-08-27 16:32:19 -04:00 · 2023-08-27 16:32:19 -04:00 · 77e971b6ec
commit 77e971b6ec
parent 8926fd20f6
4 changed files with 48 additions and 12 deletions
--- a/test/yarp/fixtures/spanning_heredoc.txt
+++ b/test/yarp/fixtures/spanning_heredoc.txt
@ -23,3 +23,9 @@ pp <<-A, %Q[h\
 g
 A
 h]
+
+# ripper can't parse this successfully, though ruby runs it correctly
+pp <<-A, %w[j\
+i
+A
+j]
--- a/test/yarp/parse_test.rb
+++ b/test/yarp/parse_test.rb
@ -77,7 +77,7 @@ class ParseTest < Test::Unit::TestCase
    # into a single token. See https://bugs.ruby-lang.org/issues/19838.
    #
    # Additionally, Ripper cannot parse the %w[] fixture in this file, so set ripper_should_parse to false.
-    ripper_should_match = false if relative == "spanning_heredoc.txt"
+    ripper_should_parse = false if relative == "spanning_heredoc.txt"

    define_method "test_filepath_#{relative}" do
      # First, read the source from the filepath. Use binmode to avoid converting CRLF on Windows,
--- a/test/yarp/snapshots/spanning_heredoc.txt
+++ b/test/yarp/snapshots/spanning_heredoc.txt
@ -1,6 +1,6 @@
-ProgramNode(164...448)(
+ProgramNode(164...541)(
  [],
-  StatementsNode(164...448)(
+  StatementsNode(164...541)(
    [CallNode(164...192)(
       nil,
       nil,
@ -110,6 +110,29 @@ ProgramNode(164...448)(
       nil,
       0,
       "pp"
+     ),
+     CallNode(520...541)(
+       nil,
+       nil,
+       (520...522),
+       nil,
+       ArgumentsNode(523...541)(
+         [InterpolatedStringNode(523...527)(
+            (523...527),
+            [StringNode(535...537)(nil, (535...537), nil, "i\n")],
+            (537...539)
+          ),
+          ArrayNode(529...541)(
+            [StringNode(532...535)(nil, (532...535), nil, "j\\\n"),
+             StringNode(539...540)(nil, (539...540), nil, "j")],
+            (529...532),
+            (540...541)
+          )]
+       ),
+       nil,
+       nil,
+       0,
+       "pp"
     )]
  )
 )
--- a/yarp/yarp.c
+++ b/yarp/yarp.c
@ -6942,9 +6942,19 @@ parser_lex(yp_parser_t *parser) {
                    yp_unescape_type_t unescape_type = lex_mode->as.list.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
                    size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);

-                    // If the result is an escaped newline, then we need to
-                    // track that newline.
-                    yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
+                    // If the result is an escaped newline ...
+                    if (*(breakpoint + difference - 1) == '\n') {
+                        if (parser->heredoc_end) {
+                            // ... if we are on the same line as a heredoc, flush the heredoc and
+                            // continue parsing after heredoc_end.
+                            parser->current.end = breakpoint + difference;
+                            parser_flush_heredoc_end(parser);
+                            LEX(YP_TOKEN_STRING_CONTENT);
+                        } else {
+                            // ... else track the newline.
+                            yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
+                        }
+                    }

                    breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
                    continue;
@ -12098,12 +12108,9 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
            accept(parser, YP_TOKEN_WORDS_SEP);

            while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
-                if (yp_array_node_size(array) == 0) {
-                    accept(parser, YP_TOKEN_WORDS_SEP);
-                } else {
-                    expect(parser, YP_TOKEN_WORDS_SEP, "Expected a separator for the strings in a `%w` list.");
-                    if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
-                }
+                accept(parser, YP_TOKEN_WORDS_SEP);
+                if (match_type_p(parser, YP_TOKEN_STRING_END)) break;
+
                expect(parser, YP_TOKEN_STRING_CONTENT, "Expected a string in a `%w` list.");

                yp_token_t opening = not_provided(parser);