From 0f076fa520999a42d88a081d7674495db2f65b3f Mon Sep 17 00:00:00 2001 From: Koichi ITO Date: Thu, 14 Mar 2024 00:37:16 +0900 Subject: [PATCH] [ruby/prism] Fix an AST and token incompatibility for `Prism::Translation::Parser` This PR fixes an AST and token incompatibility between Parser gem and `Prism::Translation::Parser` for empty xstring literal. ## Parser gem (Expected) ```console $ bundle exec ruby -Ilib -rparser/ruby33 -ve \ 'buf = Parser::Source::Buffer.new("/tmp/s.rb"); buf.source = "``"; p Parser::Ruby33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22] [s(:xstr), [], [[:tXSTRING_BEG, ["`", #]], [:tSTRING_END, ["`", #]]]] ``` ## `Prism::Translation::Parser` (Actual) Previously, the AST and tokens returned by the Parser gem were different: ```console $ bunele exec ruby -Ilib -rprism -rprism/translation/parser33 -ve \ 'buf = Parser::Source::Buffer.new("/tmp/s.rb"); buf.source = "``"; p Prism::Translation::Parser33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22] [s(:xstr, s(:str, "")), [], [[:tBACK_REF2, ["`", #]], [:tSTRING_END, ["`", #]]]] ``` After this correction, the AST and tokens returned by the Parser gem are the same: ```console $ bundle exec ruby -Ilib -rprism -rprism/translation/parser33 -ve \ 'buf = Parser::Source::Buffer.new("/tmp/s.rb"); buf.source = "``"; p Prism::Translation::Parser33.new.tokenize(buf)' ruby 3.3.0 (2023-12-25 revision https://github.com/ruby/prism/commit/5124f9ac75) [x86_64-darwin22] [s(:xstr), [], [[:tXSTRING_BEG, ["`", #]], [:tSTRING_END, ["`", #]]]] ``` https://github.com/ruby/prism/commit/4ac89dcbb5 --- lib/prism/translation/parser/compiler.rb | 2 +- lib/prism/translation/parser/lexer.rb | 2 +- test/prism/fixtures/xstring.txt | 4 ++++ test/prism/snapshots/xstring.txt | 28 +++++++++++++++++------- 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb index cf628cddd4..50c47c068e 100644 --- a/lib/prism/translation/parser/compiler.rb +++ b/lib/prism/translation/parser/compiler.rb @@ -1678,7 +1678,7 @@ module Prism children, closing = visit_heredoc(node.to_interpolated) builder.xstring_compose(token(node.opening_loc), children, closing) else - parts = if node.unescaped.lines.count <= 1 + parts = if node.unescaped.lines.one? [builder.string_internal([node.unescaped, srange(node.content_loc)])] else start_offset = node.content_loc.start_offset diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb index 92495ab3d2..9cf86476ba 100644 --- a/lib/prism/translation/parser/lexer.rb +++ b/lib/prism/translation/parser/lexer.rb @@ -326,7 +326,7 @@ module Prism type = :tIDENTIFIER end when :tXSTRING_BEG - if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT + if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END type = :tBACK_REF2 end end diff --git a/test/prism/fixtures/xstring.txt b/test/prism/fixtures/xstring.txt index fcbaad91ae..7ec09468d8 100644 --- a/test/prism/fixtures/xstring.txt +++ b/test/prism/fixtures/xstring.txt @@ -7,3 +7,7 @@ %x{ foo } + +`` + +%x{} diff --git a/test/prism/snapshots/xstring.txt b/test/prism/snapshots/xstring.txt index 56ba77a8c0..04b4cbf6ea 100644 --- a/test/prism/snapshots/xstring.txt +++ b/test/prism/snapshots/xstring.txt @@ -1,8 +1,8 @@ -@ ProgramNode (location: (1,0)-(9,1)) +@ ProgramNode (location: (1,0)-(13,4)) ├── locals: [] └── statements: - @ StatementsNode (location: (1,0)-(9,1)) - └── body: (length: 4) + @ StatementsNode (location: (1,0)-(13,4)) + └── body: (length: 6) ├── @ XStringNode (location: (1,0)-(1,7)) │ ├── flags: ∅ │ ├── opening_loc: (1,0)-(1,3) = "%x[" @@ -47,9 +47,21 @@ │ ├── content_loc: (5,1)-(5,4) = "foo" │ ├── closing_loc: (5,4)-(5,5) = "`" │ └── unescaped: "foo" - └── @ XStringNode (location: (7,0)-(9,1)) + ├── @ XStringNode (location: (7,0)-(9,1)) + │ ├── flags: ∅ + │ ├── opening_loc: (7,0)-(7,3) = "%x{" + │ ├── content_loc: (7,3)-(9,0) = "\n foo\n" + │ ├── closing_loc: (9,0)-(9,1) = "}" + │ └── unescaped: "\n foo\n" + ├── @ XStringNode (location: (11,0)-(11,2)) + │ ├── flags: ∅ + │ ├── opening_loc: (11,0)-(11,1) = "`" + │ ├── content_loc: (11,1)-(11,1) = "" + │ ├── closing_loc: (11,1)-(11,2) = "`" + │ └── unescaped: "" + └── @ XStringNode (location: (13,0)-(13,4)) ├── flags: ∅ - ├── opening_loc: (7,0)-(7,3) = "%x{" - ├── content_loc: (7,3)-(9,0) = "\n foo\n" - ├── closing_loc: (9,0)-(9,1) = "}" - └── unescaped: "\n foo\n" + ├── opening_loc: (13,0)-(13,3) = "%x{" + ├── content_loc: (13,3)-(13,3) = "" + ├── closing_loc: (13,3)-(13,4) = "}" + └── unescaped: ""