[ruby/prism] Fix parser translator ast for heredoc with written newlines

Heredocs that contain "\\n" don't start a new string node. https://github.com/ruby/prism/commit/61d9d3a15e
2025-01-03 19:14:15 +01:00 · 2025-01-03 19:14:15 +01:00 · c037f5a28c
commit c037f5a28c
parent 179e2cfa91
2 changed files with 27 additions and 7 deletions
--- a/lib/prism/translation/parser/compiler.rb
+++ b/lib/prism/translation/parser/compiler.rb
@ -2079,27 +2079,49 @@ module Prism
                escaped_lengths = []
                normalized_lengths = []
                # Keeps track of where an unescaped line should start a new token. An unescaped
                # \n would otherwise be indistinguishable from the actual newline at the end of
                # of the line. The parser gem only emits a new string node at "real" newlines,
                # line continuations don't start a new node as well.
                do_next_tokens = []
                if node.opening.end_with?("'")
                  escaped.each do |line|
                    escaped_lengths << line.bytesize
                    normalized_lengths << chomped_bytesize(line)
                    do_next_tokens << true
                  end
                else
                  escaped
-                    .chunk_while { |before, after| before.match?(/(?<!\\)\\\r?\n$/) }
+                    .chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
                    .each do |lines|
                      escaped_lengths << lines.sum(&:bytesize)
                      normalized_lengths << lines.sum { |line| chomped_bytesize(line) }
                      unescaped_lines_count = lines.sum do |line|
                        line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? || false }
                      end
                      do_next_tokens.concat(Array.new(unescaped_lines_count + 1, false))
                      do_next_tokens[-1] = true
                    end
                end
                start_offset = part.location.start_offset
                current_line = +""
                current_normalized_length = 0
-                unescaped.map.with_index do |unescaped_line, index|
+                unescaped.filter_map.with_index do |unescaped_line, index|
-                  inner_part = builder.string_internal([unescaped_line, srange_offsets(start_offset, start_offset + normalized_lengths.fetch(index, 0))])
+                  current_line << unescaped_line
-                  start_offset += escaped_lengths.fetch(index, 0)
+                  current_normalized_length += normalized_lengths.fetch(index, 0)
-                  inner_part
+
                  if do_next_tokens[index]
                    inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
                    start_offset += escaped_lengths.fetch(index, 0)
                    current_line = +""
                    current_normalized_length = 0
                    inner_part
                  else
                    nil
                  end
                end
              else
                [visit(part)]
--- a/test/prism/ruby/parser_test.rb
+++ b/test/prism/ruby/parser_test.rb
@ -65,14 +65,12 @@ module Prism
      "seattlerb/heredoc_with_extra_carriage_returns_windows.txt",
      "seattlerb/heredoc_with_only_carriage_returns_windows.txt",
      "seattlerb/heredoc_with_only_carriage_returns.txt",
      "seattlerb/parse_line_heredoc_hardnewline.txt",
      "seattlerb/pctW_lineno.txt",
      "seattlerb/regexp_esc_C_slash.txt",
      "unparser/corpus/literal/literal.txt",
      "unparser/corpus/semantic/dstr.txt",
      "whitequark/dedenting_interpolating_heredoc_fake_line_continuation.txt",
      "whitequark/parser_slash_slash_n_escaping_in_literals.txt",
      "whitequark/ruby_bug_11989.txt"
    ]
    # Not sure why these files are failing on JRuby, but skipping them for now.