[ruby/prism] Fix a token incompatibility for Prism::Translation::Parser::Lexer

This PR fixes a token incompatibility between Parser gem and `Prism::Translation::Parser`
for the heredocs_leading_whitespace.txt test.

https://github.com/ruby/prism/commit/7d45fb1eed
This commit is contained in:
Koichi ITO 2024-03-16 00:30:01 +09:00 committed by git
parent aceee71c35
commit c9da8d67fd
4 changed files with 36 additions and 21 deletions

View File

@ -217,6 +217,8 @@ module Prism
index = 0
length = lexed.length
heredoc_identifier_stack = []
while index < length
token, state = lexed[index]
index += 1
@ -275,6 +277,9 @@ module Prism
when :tSPACE
value = nil
when :tSTRING_BEG
if token.type == :HEREDOC_START
heredoc_identifier_stack.push(value.match(/<<[-~]?["']?(?<heredoc_identifier>.*?)["']?\z/)[:heredoc_identifier])
end
if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
next_location = token.location.join(next_token.location)
type = :tSTRING
@ -322,7 +327,7 @@ module Prism
when :tSTRING_END
if token.type == :HEREDOC_END && value.end_with?("\n")
newline_length = value.end_with?("\r\n") ? 2 : 1
value = value.sub(/\r?\n\z/, '')
value = heredoc_identifier_stack.pop
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length])
elsif token.type == :REGEXP_END
value = value[0]

View File

@ -3,6 +3,11 @@ a
b
FOO
<<-" FOO"
a
b
FOO
<<-' FOO'
a
b

View File

@ -73,7 +73,6 @@ module Prism
skip_tokens = [
"comments.txt",
"heredoc_with_comment.txt",
"heredocs_leading_whitespace.txt",
"indented_file_end.txt",
"strings.txt",
"xstring_with_backslash.txt"

View File

@ -1,8 +1,8 @@
@ ProgramNode (location: (1,0)-(16,10))
@ ProgramNode (location: (1,0)-(21,10))
├── locals: []
└── statements:
@ StatementsNode (location: (1,0)-(16,10))
└── body: (length: 4)
@ StatementsNode (location: (1,0)-(21,10))
└── body: (length: 5)
├── @ StringNode (location: (1,0)-(1,10))
│ ├── flags: ∅
│ ├── opening_loc: (1,0)-(1,10) = "<<-' FOO'"
@ -11,39 +11,45 @@
│ └── unescaped: "a\nb\n"
├── @ StringNode (location: (6,0)-(6,10))
│ ├── flags: ∅
│ ├── opening_loc: (6,0)-(6,10) = "<<-' FOO'"
│ ├── opening_loc: (6,0)-(6,10) = "<<-\" FOO\""
│ ├── content_loc: (7,0)-(9,0) = "a\nb\n"
│ ├── closing_loc: (9,0)-(10,0) = " FOO\n"
│ ├── closing_loc: (9,0)-(10,0) = " FOO\n"
│ └── unescaped: "a\nb\n"
├── @ InterpolatedStringNode (location: (11,0)-(11,10))
│ ├── opening_loc: (11,0)-(11,10) = "<<~' FOO'"
├── @ StringNode (location: (11,0)-(11,10))
│ ├── flags: ∅
│ ├── opening_loc: (11,0)-(11,10) = "<<-' FOO'"
│ ├── content_loc: (12,0)-(14,0) = "a\nb\n"
│ ├── closing_loc: (14,0)-(15,0) = " FOO\n"
│ └── unescaped: "a\nb\n"
├── @ InterpolatedStringNode (location: (16,0)-(16,10))
│ ├── opening_loc: (16,0)-(16,10) = "<<~' FOO'"
│ ├── parts: (length: 2)
│ │ ├── @ StringNode (location: (12,0)-(13,0))
│ │ ├── @ StringNode (location: (17,0)-(18,0))
│ │ │ ├── flags: ∅
│ │ │ ├── opening_loc: ∅
│ │ │ ├── content_loc: (12,0)-(13,0) = "a\n"
│ │ │ ├── content_loc: (17,0)-(18,0) = "a\n"
│ │ │ ├── closing_loc: ∅
│ │ │ └── unescaped: "a\n"
│ │ └── @ StringNode (location: (13,0)-(14,0))
│ │ └── @ StringNode (location: (18,0)-(19,0))
│ │ ├── flags: ∅
│ │ ├── opening_loc: ∅
│ │ ├── content_loc: (13,0)-(14,0) = "b\n"
│ │ ├── content_loc: (18,0)-(19,0) = "b\n"
│ │ ├── closing_loc: ∅
│ │ └── unescaped: "b\n"
│ └── closing_loc: (14,0)-(15,0) = " FOO\n"
└── @ InterpolatedStringNode (location: (16,0)-(16,10))
├── opening_loc: (16,0)-(16,10) = "<<~' FOO'"
│ └── closing_loc: (19,0)-(20,0) = " FOO\n"
└── @ InterpolatedStringNode (location: (21,0)-(21,10))
├── opening_loc: (21,0)-(21,10) = "<<~' FOO'"
├── parts: (length: 2)
│ ├── @ StringNode (location: (17,0)-(18,0))
│ ├── @ StringNode (location: (22,0)-(23,0))
│ │ ├── flags: ∅
│ │ ├── opening_loc: ∅
│ │ ├── content_loc: (17,0)-(18,0) = "a\n"
│ │ ├── content_loc: (22,0)-(23,0) = "a\n"
│ │ ├── closing_loc: ∅
│ │ └── unescaped: "a\n"
│ └── @ StringNode (location: (18,0)-(19,0))
│ └── @ StringNode (location: (23,0)-(24,0))
│ ├── flags: ∅
│ ├── opening_loc: ∅
│ ├── content_loc: (18,0)-(19,0) = "b\n"
│ ├── content_loc: (23,0)-(24,0) = "b\n"
│ ├── closing_loc: ∅
│ └── unescaped: "b\n"
└── closing_loc: (19,0)-(20,0) = " FOO\n"
└── closing_loc: (24,0)-(25,0) = " FOO\n"