Fix up merge conflicts for prism sync

This commit is contained in:
Kevin Newton 2025-03-18 13:02:56 -04:00
parent 33aaa069a4
commit b003d40194
3 changed files with 14 additions and 301 deletions

View File

@ -1100,7 +1100,7 @@ module Prism
def visit_interpolated_regular_expression_node(node)
builder.regexp_compose(
token(node.opening_loc),
visit_all(node.parts),
string_nodes_from_interpolation(node, node.opening),
[node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)],
builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)])
)
@ -1117,45 +1117,6 @@ module Prism
return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
end
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
=======
parts = if node.parts.one? { |part| part.type == :string_node }
node.parts.flat_map do |node|
if node.type == :string_node && node.unescaped.lines.count >= 2
start_offset = node.content_loc.start_offset
node.unescaped.lines.map do |line|
end_offset = start_offset + line.bytesize
offsets = srange_offsets(start_offset, end_offset)
start_offset = end_offset
builder.string_internal([line, offsets])
end
else
visit(node)
end
=======
parts = node.parts.flat_map do |part|
# When the content of a string node is split across multiple lines, the
# parser gem creates individual string nodes for each line the content is part of.
if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, node.opening)
else
visit(part)
>>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
end
else
visit_all(node.parts)
end
>>>>>>> a651126458 (Fix an incompatibility with the parser translator)
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
builder.string_compose(
token(node.opening_loc),
string_nodes_from_interpolation(node, node.opening),
@ -1739,23 +1700,7 @@ module Prism
if node.content.include?("\n")
string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
else
<<<<<<< HEAD
<<<<<<< HEAD
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
=======
start_offset = node.content_loc.start_offset
[content_lines, unescaped_lines].transpose.map do |content_line, unescaped_line|
end_offset = start_offset + content_line.bytesize
offsets = srange_offsets(start_offset, end_offset)
start_offset = end_offset
builder.string_internal([unescaped_line, offsets])
end
>>>>>>> a651126458 (Fix an incompatibility with the parser translator)
=======
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
>>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
end
builder.string_compose(
@ -1799,10 +1744,6 @@ module Prism
builder.symbol([node.unescaped, srange(node.location)])
end
else
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
parts =
if node.value == ""
[]
@ -1810,22 +1751,6 @@ module Prism
string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)
else
[builder.string_internal([node.unescaped, srange(node.value_loc)])]
<<<<<<< HEAD
=======
parts = if node.value.lines.one?
[builder.string_internal([node.unescaped, srange(node.value_loc)])]
else
start_offset = node.value_loc.start_offset
node.value.lines.map do |line|
end_offset = start_offset + line.bytesize
offsets = srange_offsets(start_offset, end_offset)
start_offset = end_offset
builder.string_internal([line, offsets])
>>>>>>> a651126458 (Fix an incompatibility with the parser translator)
=======
>>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
end
builder.symbol_compose(
@ -1964,23 +1889,7 @@ module Prism
elsif node.content.include?("\n")
string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
else
<<<<<<< HEAD
<<<<<<< HEAD
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
=======
start_offset = node.content_loc.start_offset
node.unescaped.lines.map do |line|
end_offset = start_offset + line.bytesize
offsets = srange_offsets(start_offset, end_offset)
start_offset = end_offset
builder.string_internal([line, offsets])
end
>>>>>>> a651126458 (Fix an incompatibility with the parser translator)
=======
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
>>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
end
builder.xstring_compose(
@ -2219,31 +2128,14 @@ module Prism
def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening)
unescaped = unescaped.lines
escaped = escaped.lines
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
=======
>>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
=======
percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
=======
percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
regex = opening == "/" || opening&.start_with?("%r")
# Non-interpolating strings
if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i")
current_length = 0
current_line = +""
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
escaped.filter_map.with_index do |escaped_line, index|
unescaped_line = unescaped.fetch(index, "")
current_length += escaped_line.bytesize
@ -2258,19 +2150,6 @@ module Prism
current_line = +""
current_length = 0
s
<<<<<<< HEAD
<<<<<<< HEAD
=======
if opening&.end_with?("'")
escaped.each do |line|
escaped_lengths << line.bytesize
normalized_lengths << chomped_bytesize(line)
do_next_tokens << true
>>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
end
else
escaped_lengths = []
@ -2285,11 +2164,18 @@ module Prism
.chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
.each do |lines|
escaped_lengths << lines.sum(&:bytesize)
unescaped_lines_count = lines.sum do |line|
count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
count -= 1 if !line.end_with?("\n") && count > 0
count
end
unescaped_lines_count =
if regex
0 # Will always be preserved as is
else
lines.sum do |line|
count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
count -= 1 if !line.end_with?("\n") && count > 0
count
end
end
extra = 1
extra = lines.count if percent_array # Account for line continuations in percent arrays

View File

@ -10,15 +10,7 @@ module Prism
# format for the parser gem.
class Lexer
# These tokens are always skipped
<<<<<<< HEAD
<<<<<<< HEAD
TYPES_ALWAYS_SKIP = Set.new(%i[IGNORED_NEWLINE __END__ EOF])
=======
TYPES_ALWAYS_SKIP = %i[IGNORED_NEWLINE __END__ EOF].to_set
>>>>>>> ca9500a3fc (Optimize array inclusion checks in the parser translator)
=======
TYPES_ALWAYS_SKIP = Set.new(%i[IGNORED_NEWLINE __END__ EOF])
>>>>>>> 422d5c4c64 (Use Set.new over to_set)
private_constant :TYPES_ALWAYS_SKIP
# The direct translating of types between the two lexers.
@ -203,42 +195,18 @@ module Prism
#
# NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned
# instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046
<<<<<<< HEAD
<<<<<<< HEAD
LAMBDA_TOKEN_TYPES = Set.new([:kDO_LAMBDA, :tLAMBDA, :tLAMBEG])
=======
LAMBDA_TOKEN_TYPES = [:kDO_LAMBDA, :tLAMBDA, :tLAMBEG].to_set
>>>>>>> ca9500a3fc (Optimize array inclusion checks in the parser translator)
=======
LAMBDA_TOKEN_TYPES = Set.new([:kDO_LAMBDA, :tLAMBDA, :tLAMBEG])
>>>>>>> 422d5c4c64 (Use Set.new over to_set)
# The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem.
# The following token types are listed as those classified as `tLPAREN`.
LPAREN_CONVERSION_TOKEN_TYPES = Set.new([
:kBREAK, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
:tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS
<<<<<<< HEAD
<<<<<<< HEAD
])
# Types of tokens that are allowed to continue a method call with comments in-between.
# For these, the parser gem doesn't emit a newline token after the last comment.
COMMENT_CONTINUATION_TYPES = Set.new([:COMMENT, :AMPERSAND_DOT, :DOT])
=======
].to_set
# Types of tokens that are allowed to continue a method call with comments in-between.
# For these, the parser gem doesn't emit a newline token after the last comment.
COMMENT_CONTINUATION_TYPES = [:COMMENT, :AMPERSAND_DOT, :DOT].to_set
>>>>>>> ca9500a3fc (Optimize array inclusion checks in the parser translator)
=======
])
# Types of tokens that are allowed to continue a method call with comments in-between.
# For these, the parser gem doesn't emit a newline token after the last comment.
COMMENT_CONTINUATION_TYPES = Set.new([:COMMENT, :AMPERSAND_DOT, :DOT])
>>>>>>> 422d5c4c64 (Use Set.new over to_set)
private_constant :COMMENT_CONTINUATION_TYPES
# Heredocs are complex and require us to keep track of a bit of info to refer to later
@ -435,22 +403,6 @@ module Prism
end
when :tSTRING_CONTENT
is_percent_array = percent_array?(quote_stack.last)
<<<<<<< HEAD
=======
if (lines = token.value.lines).one?
<<<<<<< HEAD
<<<<<<< HEAD
# Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
# The parser gem only removes indentation when the heredoc is not nested
not_nested = heredoc_stack.size == 1
if is_percent_array
value = percent_array_unescape(value)
elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
value = trim_heredoc_whitespace(value, current_heredoc)
end
>>>>>>> bd3dd2b62a (Fix parser translator tokens for %-arrays with whitespace escapes)
if (lines = token.value.lines).one?
# Prism usually emits a single token for strings with line continuations.
@ -470,45 +422,6 @@ module Prism
value = trim_heredoc_whitespace(token.value, current_heredoc)
end
<<<<<<< HEAD
=======
# Prism usually emits a single token for strings with line continuations.
# For squiggly heredocs they are not joined so we do that manually here.
current_string = +""
current_length = 0
start_offset = token.location.start_offset
while token.type == :STRING_CONTENT
current_length += token.value.bytesize
# Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
# The parser gem only removes indentation when the heredoc is not nested
not_nested = heredoc_stack.size == 1
if is_percent_array
value = percent_array_unescape(token.value)
elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
value = trim_heredoc_whitespace(token.value, current_heredoc)
end
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
=======
# Prism usually emits a single token for strings with line continuations.
# For squiggly heredocs they are not joined so we do that manually here.
current_string = +""
current_length = 0
start_offset = token.location.start_offset
while token.type == :STRING_CONTENT
current_length += token.value.bytesize
# Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
# The parser gem only removes indentation when the heredoc is not nested
not_nested = heredoc_stack.size == 1
if is_percent_array
value = percent_array_unescape(token.value)
elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
value = trim_heredoc_whitespace(token.value, current_heredoc)
end
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
current_string << unescape_string(value, quote_stack.last)
if (backslash_count = token.value[/(\\{1,})\n/, 1]&.length).nil? || backslash_count.even? || !interpolation?(quote_stack.last)
tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
@ -553,11 +466,6 @@ module Prism
current_line = +""
adjustment = 0
end
=======
end_offset = start_offset + adjusted_line.bytesize + adjustment
tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
start_offset = end_offset
>>>>>>> a651126458 (Fix an incompatibility with the parser translator)
end
end
next
@ -786,44 +694,7 @@ module Prism
while (skipped = scanner.skip_until(/\\/))
# Append what was just skipped over, excluding the found backslash.
result.append_as_bytes(string.byteslice(scanner.pos - skipped, skipped - 1))
<<<<<<< HEAD
<<<<<<< HEAD
escape_read(result, scanner, false, false)
=======
if scanner.peek(1) == "\n"
# Line continuation
scanner.pos += 1
elsif (replacement = ESCAPES[scanner.peek(1)])
# Simple single-character escape sequences like \n
result.append_as_bytes(replacement)
scanner.pos += 1
elsif (octal = scanner.check(/[0-7]{1,3}/))
# \nnn
result.append_as_bytes(octal.to_i(8).chr)
scanner.pos += octal.bytesize
elsif (hex = scanner.check(/x([0-9a-fA-F]{1,2})/))
# \xnn
result.append_as_bytes(hex[1..].to_i(16).chr)
scanner.pos += hex.bytesize
elsif (unicode = scanner.check(/u([0-9a-fA-F]{4})/))
# \unnnn
result.append_as_bytes(unicode[1..].hex.chr(Encoding::UTF_8))
scanner.pos += unicode.bytesize
elsif scanner.peek(3) == "u{}"
# https://github.com/whitequark/parser/issues/856
scanner.pos += 3
elsif (unicode_parts = scanner.check(/u{.*}/))
# \u{nnnn ...}
unicode_parts[2..-2].split.each do |unicode|
result.append_as_bytes(unicode.hex.chr(Encoding::UTF_8))
end
scanner.pos += unicode_parts.bytesize
end
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
=======
escape_read(result, scanner, false, false)
>>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation)
end
# Add remaining chars
@ -835,13 +706,6 @@ module Prism
end
end
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
# Certain strings are merged into a single string token.
def simplify_string?(value, quote)
case quote
@ -859,24 +723,11 @@ module Prism
end
end
<<<<<<< HEAD
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation)
# Escape a byte value, given the control and meta flags.
def escape_build(value, control, meta)
value &= 0x9f if control
value |= 0x80 if meta
<<<<<<< HEAD
<<<<<<< HEAD
value
=======
value.chr
>>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation)
=======
value
>>>>>>> 161c606b1f (Fix parser translator crash for certain octal escapes)
end
# Read an escape out of the string scanner, given the control and meta
@ -920,15 +771,6 @@ module Prism
end
end
<<<<<<< HEAD
=======
>>>>>>> bd3dd2b62a (Fix parser translator tokens for %-arrays with whitespace escapes)
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
=======
>>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation)
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
# In a percent array, certain whitespace can be preceeded with a backslash,
# causing the following characters to be part of the previous element.
def percent_array_unescape(string)
@ -953,17 +795,6 @@ module Prism
# Determine if characters preceeded by a backslash should be escaped or not
def interpolation?(quote)
!quote.end_with?("'") && !quote.start_with?("%q", "%w", "%i", "%s")
<<<<<<< HEAD
<<<<<<< HEAD
end
# Regexp allow interpolation but are handled differently during unescaping
def regexp?(quote)
quote == "/" || quote.start_with?("%r")
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
=======
>>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
end
# Regexp allow interpolation but are handled differently during unescaping

View File

@ -2,11 +2,7 @@
source "https://rubygems.org"
<<<<<<< HEAD:tool/bundler/rubocop_gems.rb
gem "rubocop", ">= 1.52.1", "< 2"
=======
ruby "~> 3.3.5", engine: "truffleruby", engine_version: "~> 24.2.0"
>>>>>>> 2afe89f8ce (Update truffleruby version):gemfiles/truffleruby/Gemfile
gem "minitest"
gem "irb"