[ruby/prism] Fix up heredoc location translation for parser

https://github.com/ruby/prism/commit/a4e164e22b
This commit is contained in:
Kevin Newton 2024-05-31 15:08:16 -04:00 committed by git
parent 63ea77916a
commit b5c8fb9a3f
2 changed files with 22 additions and 41 deletions

View File

@ -1076,36 +1076,7 @@ module Prism
# ^^^^^^^^^^^^ # ^^^^^^^^^^^^
def visit_interpolated_string_node(node) def visit_interpolated_string_node(node)
if node.heredoc? if node.heredoc?
children, closing = visit_heredoc(node) return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
opening = token(node.opening_loc)
start_offset = node.opening_loc.end_offset + 1
end_offset = node.parts.first.location.start_offset
# In the below case, the offsets should be the same:
#
# <<~HEREDOC
# a #{b}
# HEREDOC
#
# But in this case, the end_offset would be greater than the start_offset:
#
# <<~HEREDOC
# #{b}
# HEREDOC
#
# So we need to make sure the result node's heredoc range is correct, without updating the children
result = if start_offset < end_offset
# We need to add a padding string to ensure that the heredoc has correct range for its body
padding_string_node = builder.string_internal(["", srange_offsets(start_offset, end_offset)])
node_with_correct_location = builder.string_compose(opening, [padding_string_node, *children], closing)
# But the padding string should not be included in the final AST, so we need to update the result's children
node_with_correct_location.updated(:dstr, children)
else
builder.string_compose(opening, children, closing)
end
return result
end end
parts = if node.parts.one? { |part| part.type == :string_node } parts = if node.parts.one? { |part| part.type == :string_node }
@ -1149,8 +1120,7 @@ module Prism
# ^^^^^^^^^^^^ # ^^^^^^^^^^^^
def visit_interpolated_x_string_node(node) def visit_interpolated_x_string_node(node)
if node.heredoc? if node.heredoc?
children, closing = visit_heredoc(node) visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
builder.xstring_compose(token(node.opening_loc), children, closing)
else else
builder.xstring_compose( builder.xstring_compose(
token(node.opening_loc), token(node.opening_loc),
@ -1685,8 +1655,7 @@ module Prism
# ^^^^^ # ^^^^^
def visit_string_node(node) def visit_string_node(node)
if node.heredoc? if node.heredoc?
children, closing = visit_heredoc(node.to_interpolated) visit_heredoc(node.to_interpolated) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
builder.string_compose(token(node.opening_loc), children, closing)
elsif node.opening == "?" elsif node.opening == "?"
builder.character([node.unescaped, srange(node.location)]) builder.character([node.unescaped, srange(node.location)])
elsif node.opening&.start_with?("%") && node.unescaped.empty? elsif node.opening&.start_with?("%") && node.unescaped.empty?
@ -1890,8 +1859,7 @@ module Prism
# ^^^^^ # ^^^^^
def visit_x_string_node(node) def visit_x_string_node(node)
if node.heredoc? if node.heredoc?
children, closing = visit_heredoc(node.to_interpolated) visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
builder.xstring_compose(token(node.opening_loc), children, closing)
else else
parts = if node.unescaped.lines.one? parts = if node.unescaped.lines.one?
[builder.string_internal([node.unescaped, srange(node.content_loc)])] [builder.string_internal([node.unescaped, srange(node.content_loc)])]
@ -2055,6 +2023,17 @@ module Prism
# Visit a heredoc that can be either a string or an xstring. # Visit a heredoc that can be either a string or an xstring.
def visit_heredoc(node) def visit_heredoc(node)
children = Array.new children = Array.new
indented = false
# If this is a dedenting heredoc, then we need to insert the opening
# content into the children as well.
if node.opening.start_with?("<<~") && node.parts.length > 0 && !node.parts.first.is_a?(StringNode)
location = node.parts.first.location
location = location.copy(start_offset: location.start_offset - location.start_line_slice.bytesize)
children << builder.string_internal(token(location))
indented = true
end
node.parts.each do |part| node.parts.each do |part|
pushing = pushing =
if part.is_a?(StringNode) && part.unescaped.include?("\n") if part.is_a?(StringNode) && part.unescaped.include?("\n")
@ -2094,8 +2073,10 @@ module Prism
closing = node.closing closing = node.closing
closing_t = [closing.chomp, srange_offsets(node.closing_loc.start_offset, node.closing_loc.end_offset - (closing[/\s+$/]&.length || 0))] closing_t = [closing.chomp, srange_offsets(node.closing_loc.start_offset, node.closing_loc.end_offset - (closing[/\s+$/]&.length || 0))]
composed = yield children, closing_t
[children, closing_t] composed = composed.updated(nil, children[1..-1]) if indented
composed
end end
# Visit a numeric node and account for the optional sign. # Visit a numeric node and account for the optional sign.

View File

@ -46,7 +46,6 @@ module Prism
# skip them for now. # skip them for now.
skip_all = [ skip_all = [
"dash_heredocs.txt", "dash_heredocs.txt",
"dos_endings.txt",
"heredocs_with_ignored_newlines.txt", "heredocs_with_ignored_newlines.txt",
"regex.txt", "regex.txt",
"regex_char_width.txt", "regex_char_width.txt",
@ -60,7 +59,6 @@ module Prism
"seattlerb/block_paren_splat.txt", "seattlerb/block_paren_splat.txt",
"seattlerb/bug190.txt", "seattlerb/bug190.txt",
"seattlerb/heredoc_nested.txt", "seattlerb/heredoc_nested.txt",
"seattlerb/heredoc_squiggly_blank_line_plus_interpolation.txt",
"seattlerb/heredoc_with_carriage_return_escapes_windows.txt", "seattlerb/heredoc_with_carriage_return_escapes_windows.txt",
"seattlerb/heredoc_with_carriage_return_escapes.txt", "seattlerb/heredoc_with_carriage_return_escapes.txt",
"seattlerb/heredoc_with_extra_carriage_returns_windows.txt", "seattlerb/heredoc_with_extra_carriage_returns_windows.txt",
@ -71,9 +69,7 @@ module Prism
"seattlerb/pctW_lineno.txt", "seattlerb/pctW_lineno.txt",
"seattlerb/regexp_esc_C_slash.txt", "seattlerb/regexp_esc_C_slash.txt",
"seattlerb/TestRubyParserShared.txt", "seattlerb/TestRubyParserShared.txt",
"unparser/corpus/literal/assignment.txt",
"unparser/corpus/literal/block.txt", "unparser/corpus/literal/block.txt",
"unparser/corpus/literal/dstr.txt",
"unparser/corpus/literal/literal.txt", "unparser/corpus/literal/literal.txt",
"unparser/corpus/literal/pattern.txt", "unparser/corpus/literal/pattern.txt",
"unparser/corpus/semantic/dstr.txt", "unparser/corpus/semantic/dstr.txt",
@ -95,6 +91,7 @@ module Prism
# output expected by the parser gem, so we'll skip them for now. # output expected by the parser gem, so we'll skip them for now.
skip_tokens = [ skip_tokens = [
"comments.txt", "comments.txt",
"dos_endings.txt",
"embdoc_no_newline_at_end.txt", "embdoc_no_newline_at_end.txt",
"heredoc_with_comment.txt", "heredoc_with_comment.txt",
"indented_file_end.txt", "indented_file_end.txt",
@ -113,6 +110,7 @@ module Prism
"seattlerb/heredoc__backslash_dos_format.txt", "seattlerb/heredoc__backslash_dos_format.txt",
"seattlerb/heredoc_backslash_nl.txt", "seattlerb/heredoc_backslash_nl.txt",
"seattlerb/heredoc_comma_arg.txt", "seattlerb/heredoc_comma_arg.txt",
"seattlerb/heredoc_squiggly_blank_line_plus_interpolation.txt",
"seattlerb/heredoc_squiggly_blank_lines.txt", "seattlerb/heredoc_squiggly_blank_lines.txt",
"seattlerb/heredoc_squiggly_interp.txt", "seattlerb/heredoc_squiggly_interp.txt",
"seattlerb/heredoc_squiggly_tabs_extra.txt", "seattlerb/heredoc_squiggly_tabs_extra.txt",
@ -151,6 +149,8 @@ module Prism
"seattlerb/str_single_newline.txt", "seattlerb/str_single_newline.txt",
"seattlerb/symbol_empty.txt", "seattlerb/symbol_empty.txt",
"seattlerb/symbols_empty_space.txt", "seattlerb/symbols_empty_space.txt",
"unparser/corpus/literal/assignment.txt",
"unparser/corpus/literal/dstr.txt",
"unparser/corpus/semantic/opasgn.txt", "unparser/corpus/semantic/opasgn.txt",
"whitequark/args.txt", "whitequark/args.txt",
"whitequark/beginless_erange_after_newline.txt", "whitequark/beginless_erange_after_newline.txt",