From 2af6bc26c593951fc591a9d77461d092fab728d8 Mon Sep 17 00:00:00 2001 From: Koichi ITO Date: Sat, 2 Mar 2024 00:43:15 +0900 Subject: [PATCH] [ruby/prism] Fix an AST and token incompatibility for `Prism::Translation::Parser` Fixes https://github.com/ruby/prism/pull/2515. This PR fixes an AST and token incompatibility between Parser gem and `Prism::Translation::Parser` for string literal with line breaks. https://github.com/ruby/prism/commit/c58466e5bf --- lib/prism/translation/parser/compiler.rb | 16 +++++++++++++++- lib/prism/translation/parser/lexer.rb | 2 +- test/prism/fixtures/dstring.txt | 2 ++ test/prism/snapshots/dstring.txt | 11 +++++++++++ 4 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 test/prism/fixtures/dstring.txt create mode 100644 test/prism/snapshots/dstring.txt diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb index d64e382c78..bf5b85441b 100644 --- a/lib/prism/translation/parser/compiler.rb +++ b/lib/prism/translation/parser/compiler.rb @@ -1487,9 +1487,23 @@ module Prism elsif node.opening == "?" builder.character([node.unescaped, srange(node.location)]) else + parts = if node.unescaped.lines.count <= 1 + [builder.string_internal([node.unescaped, srange(node.content_loc)])] + else + start_offset = node.content_loc.start_offset + + node.unescaped.lines.map do |line| + end_offset = start_offset + line.length + offsets = srange_offsets(start_offset, end_offset) + start_offset = end_offset + + builder.string_internal([line, offsets]) + end + end + builder.string_compose( token(node.opening_loc), - [builder.string_internal([node.unescaped, srange(node.content_loc)])], + parts, token(node.closing_loc) ) end diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb index b28273b03f..8bb3084577 100644 --- a/lib/prism/translation/parser/lexer.rb +++ b/lib/prism/translation/parser/lexer.rb @@ -281,7 +281,7 @@ module Prism value = "" location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset]) index += 1 - elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END + elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END next_location = token.location.join(next_next_token.location) type = :tSTRING value = next_token.value diff --git a/test/prism/fixtures/dstring.txt b/test/prism/fixtures/dstring.txt new file mode 100644 index 0000000000..b7a0958d3f --- /dev/null +++ b/test/prism/fixtures/dstring.txt @@ -0,0 +1,2 @@ +"foo + bar" diff --git a/test/prism/snapshots/dstring.txt b/test/prism/snapshots/dstring.txt new file mode 100644 index 0000000000..a24eaf193a --- /dev/null +++ b/test/prism/snapshots/dstring.txt @@ -0,0 +1,11 @@ +@ ProgramNode (location: (1,0)-(2,6)) +├── locals: [] +└── statements: + @ StatementsNode (location: (1,0)-(2,6)) + └── body: (length: 1) + └── @ StringNode (location: (1,0)-(2,6)) + ├── flags: ∅ + ├── opening_loc: (1,0)-(1,1) = "\"" + ├── content_loc: (1,1)-(2,5) = "foo\n bar" + ├── closing_loc: (2,5)-(2,6) = "\"" + └── unescaped: "foo\n bar"