[ruby/prism] Add parser translation

https://github.com/ruby/prism/commit/8cdec8070c
2024-01-10 11:04:39 -05:00 · 2024-01-10 11:04:39 -05:00 · f12ebe1188
commit f12ebe1188
parent 223910b329
10 changed files with 2516 additions and 5 deletions
--- a/lib/prism.rb
+++ b/lib/prism.rb
@ -26,6 +26,7 @@ module Prism
  autoload :Pack, "prism/pack"
  autoload :Pattern, "prism/pattern"
  autoload :Serialize, "prism/serialize"
  autoload :Translation, "prism/translation"
  autoload :Visitor, "prism/visitor"
  # Some of these constants are not meant to be exposed, so marking them as
--- a/lib/prism/node_ext.rb
+++ b/lib/prism/node_ext.rb
@ -81,7 +81,7 @@ module Prism
  class RationalNode < Node
    # Returns the value of the node as a Ruby Rational.
    def value
-      Rational(numeric.is_a?(IntegerNode) && !numeric.decimal? ? numeric.value : slice.chomp("r"))
+      Rational(numeric.is_a?(IntegerNode) ? numeric.value : slice.chomp("r"))
    end
  end
--- a/lib/prism/prism.gemspec
+++ b/lib/prism/prism.gemspec
@ -31,6 +31,7 @@ Gem::Specification.new do |spec|
    "docs/javascript.md",
    "docs/local_variable_depth.md",
    "docs/mapping.md",
    "docs/parser_translation.md",
    "docs/parsing_rules.md",
    "docs/releasing.md",
    "docs/ripper.md",
@ -74,16 +75,21 @@ Gem::Specification.new do |spec|
    "lib/prism/ffi.rb",
    "lib/prism/lex_compat.rb",
    "lib/prism/mutation_compiler.rb",
    "lib/prism/node.rb",
    "lib/prism/node_ext.rb",
    "lib/prism/node_inspector.rb",
    "lib/prism/node.rb",
    "lib/prism/pack.rb",
    "lib/prism/parse_result.rb",
    "lib/prism/parse_result/comments.rb",
    "lib/prism/parse_result/newlines.rb",
    "lib/prism/pattern.rb",
    "lib/prism/ripper_compat.rb",
    "lib/prism/serialize.rb",
-    "lib/prism/parse_result/comments.rb",
+    "lib/prism/translation.rb",
-    "lib/prism/parse_result/newlines.rb",
+    "lib/prism/translation/parser.rb",
    "lib/prism/translation/parser/compiler.rb",
    "lib/prism/translation/parser/lexer.rb",
    "lib/prism/translation/parser/rubocop.rb",
    "lib/prism/visitor.rb",
    "src/diagnostic.c",
    "src/encoding.c",
--- a/lib/prism/translation.rb
+++ b/lib/prism/translation.rb
@ -0,0 +1,11 @@
 # frozen_string_literal: true
 module Prism
  # This module is responsible for converting the prism syntax tree into other
  # syntax trees. At the moment it only supports converting to the
  # whitequark/parser gem's syntax tree, but support is planned for the
  # seattlerb/ruby_parser gem's syntax tree as well.
  module Translation
    autoload :Parser, "prism/translation/parser"
  end
 end
--- a/lib/prism/translation/parser.rb
+++ b/lib/prism/translation/parser.rb
@ -0,0 +1,136 @@
 # frozen_string_literal: true
 require "parser"
 module Prism
  module Translation
    # This class is the entry-point for converting a prism syntax tree into the
    # whitequark/parser gem's syntax tree. It inherits from the base parser for
    # the parser gem, and overrides the parse* methods to parse with prism and
    # then translate.
    class Parser < ::Parser::Base
      Racc_debug_parser = false # :nodoc:
      def version # :nodoc:
        33
      end
      # The default encoding for Ruby files is UTF-8.
      def default_encoding
        Encoding::UTF_8
      end
      def yyerror # :nodoc:
      end
      # Parses a source buffer and returns the AST.
      def parse(source_buffer)
        @source_buffer = source_buffer
        source = source_buffer.source
        build_ast(
          Prism.parse(source, filepath: source_buffer.name).value,
          build_offset_cache(source)
        )
      ensure
        @source_buffer = nil
      end
      # Parses a source buffer and returns the AST and the source code comments.
      def parse_with_comments(source_buffer)
        @source_buffer = source_buffer
        source = source_buffer.source
        offset_cache = build_offset_cache(source)
        result = Prism.parse(source, filepath: source_buffer.name)
        [
          build_ast(result.value, offset_cache),
          build_comments(result.comments, offset_cache)
        ]
      ensure
        @source_buffer = nil
      end
      # Parses a source buffer and returns the AST, the source code comments,
      # and the tokens emitted by the lexer.
      def tokenize(source_buffer, _recover = false)
        @source_buffer = source_buffer
        source = source_buffer.source
        offset_cache = build_offset_cache(source)
        result = Prism.parse_lex(source, filepath: source_buffer.name)
        program, tokens = result.value
        [
          build_ast(program, offset_cache),
          build_comments(result.comments, offset_cache),
          build_tokens(tokens, offset_cache)
        ]
      ensure
        @source_buffer = nil
      end
      # Since prism resolves num params for us, we don't need to support this
      # kind of logic here.
      def try_declare_numparam(node)
        node.children[0].match?(/\A_[1-9]\z/)
      end
      private
      # Prism deals with offsets in bytes, while the parser gem deals with
      # offsets in characters. We need to handle this conversion in order to
      # build the parser gem AST.
      #
      # If the bytesize of the source is the same as the length, then we can
      # just use the offset directly. Otherwise, we build a hash that functions
      # as a cache for the conversion.
      #
      # This is a good opportunity for some optimizations. If the source file
      # has any multi-byte characters, this can tank the performance of the
      # translator. We could make this significantly faster by using a
      # different data structure for the cache.
      def build_offset_cache(source)
        if source.bytesize == source.length
          -> (offset) { offset }
        else
          Hash.new do |hash, offset|
            hash[offset] = source.byteslice(0, offset).length
          end
        end
      end
      # Build the parser gem AST from the prism AST.
      def build_ast(program, offset_cache)
        program.accept(Compiler.new(self, offset_cache))
      end
      # Build the parser gem comments from the prism comments.
      def build_comments(comments, offset_cache)
        comments.map do |comment|
          location = comment.location
          ::Parser::Source::Comment.new(
            ::Parser::Source::Range.new(
              source_buffer,
              offset_cache[location.start_offset],
              offset_cache[location.end_offset]
            )
          )
        end
      end
      # Build the parser gem tokens from the prism tokens.
      def build_tokens(tokens, offset_cache)
        Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a
      end
      require_relative "parser/compiler"
      require_relative "parser/lexer"
      private_constant :Compiler
      private_constant :Lexer
    end
  end
 end
--- a/lib/prism/translation/parser/compiler.rb
+++ b/lib/prism/translation/parser/compiler.rb
--- a/lib/prism/translation/parser/lexer.rb
+++ b/lib/prism/translation/parser/lexer.rb
@ -0,0 +1,335 @@
 # frozen_string_literal: true
 module Prism
  module Translation
    class Parser
      # Accepts a list of prism tokens and converts them into the expected
      # format for the parser gem.
      class Lexer
        # The direct translating of types between the two lexers.
        TYPES = {
          # These tokens should never appear in the output of the lexer.
          EOF: nil,
          MISSING: nil,
          NOT_PROVIDED: nil,
          IGNORED_NEWLINE: nil,
          EMBDOC_END: nil,
          EMBDOC_LINE: nil,
          __END__: nil,
          # These tokens have more or less direct mappings.
          AMPERSAND: :tAMPER2,
          AMPERSAND_AMPERSAND: :tANDOP,
          AMPERSAND_AMPERSAND_EQUAL: :tOP_ASGN,
          AMPERSAND_DOT: :tANDDOT,
          AMPERSAND_EQUAL: :tOP_ASGN,
          BACK_REFERENCE: :tBACK_REF,
          BACKTICK: :tXSTRING_BEG,
          BANG: :tBANG,
          BANG_EQUAL: :tNEQ,
          BANG_TILDE: :tNMATCH,
          BRACE_LEFT: :tLCURLY,
          BRACE_RIGHT: :tRCURLY,
          BRACKET_LEFT: :tLBRACK2,
          BRACKET_LEFT_ARRAY: :tLBRACK,
          BRACKET_LEFT_RIGHT: :tAREF,
          BRACKET_LEFT_RIGHT_EQUAL: :tASET,
          BRACKET_RIGHT: :tRBRACK,
          CARET: :tCARET,
          CARET_EQUAL: :tOP_ASGN,
          CHARACTER_LITERAL: :tCHARACTER,
          CLASS_VARIABLE: :tCVAR,
          COLON: :tCOLON,
          COLON_COLON: :tCOLON2,
          COMMA: :tCOMMA,
          COMMENT: :tCOMMENT,
          CONSTANT: :tCONSTANT,
          DOT: :tDOT,
          DOT_DOT: :tDOT2,
          DOT_DOT_DOT: :tDOT3,
          EMBDOC_BEGIN: :tCOMMENT,
          EMBEXPR_BEGIN: :tSTRING_DBEG,
          EMBEXPR_END: :tSTRING_DEND,
          EMBVAR: :tSTRING_DVAR,
          EQUAL: :tEQL,
          EQUAL_EQUAL: :tEQ,
          EQUAL_EQUAL_EQUAL: :tEQQ,
          EQUAL_GREATER: :tASSOC,
          EQUAL_TILDE: :tMATCH,
          FLOAT: :tFLOAT,
          FLOAT_IMAGINARY: :tIMAGINARY,
          FLOAT_RATIONAL: :tRATIONAL,
          FLOAT_RATIONAL_IMAGINARY: :tIMAGINARY,
          GLOBAL_VARIABLE: :tGVAR,
          GREATER: :tGT,
          GREATER_EQUAL: :tGEQ,
          GREATER_GREATER: :tRSHFT,
          GREATER_GREATER_EQUAL: :tOP_ASGN,
          HEREDOC_START: :tSTRING_BEG,
          HEREDOC_END: :tSTRING_END,
          IDENTIFIER: :tIDENTIFIER,
          INSTANCE_VARIABLE: :tIVAR,
          INTEGER: :tINTEGER,
          INTEGER_IMAGINARY: :tIMAGINARY,
          INTEGER_RATIONAL: :tRATIONAL,
          INTEGER_RATIONAL_IMAGINARY: :tIMAGINARY,
          KEYWORD_ALIAS: :kALIAS,
          KEYWORD_AND: :kAND,
          KEYWORD_BEGIN: :kBEGIN,
          KEYWORD_BEGIN_UPCASE: :klBEGIN,
          KEYWORD_BREAK: :kBREAK,
          KEYWORD_CASE: :kCASE,
          KEYWORD_CLASS: :kCLASS,
          KEYWORD_DEF: :kDEF,
          KEYWORD_DEFINED: :kDEFINED,
          KEYWORD_DO: :kDO,
          KEYWORD_DO_LOOP: :kDO_COND,
          KEYWORD_END: :kEND,
          KEYWORD_END_UPCASE: :klEND,
          KEYWORD_ENSURE: :kENSURE,
          KEYWORD_ELSE: :kELSE,
          KEYWORD_ELSIF: :kELSIF,
          KEYWORD_FALSE: :kFALSE,
          KEYWORD_FOR: :kFOR,
          KEYWORD_IF: :kIF,
          KEYWORD_IF_MODIFIER: :kIF_MOD,
          KEYWORD_IN: :kIN,
          KEYWORD_MODULE: :kMODULE,
          KEYWORD_NEXT: :kNEXT,
          KEYWORD_NIL: :kNIL,
          KEYWORD_NOT: :kNOT,
          KEYWORD_OR: :kOR,
          KEYWORD_REDO: :kREDO,
          KEYWORD_RESCUE: :kRESCUE,
          KEYWORD_RESCUE_MODIFIER: :kRESCUE_MOD,
          KEYWORD_RETRY: :kRETRY,
          KEYWORD_RETURN: :kRETURN,
          KEYWORD_SELF: :kSELF,
          KEYWORD_SUPER: :kSUPER,
          KEYWORD_THEN: :kTHEN,
          KEYWORD_TRUE: :kTRUE,
          KEYWORD_UNDEF: :kUNDEF,
          KEYWORD_UNLESS: :kUNLESS,
          KEYWORD_UNLESS_MODIFIER: :kUNLESS_MOD,
          KEYWORD_UNTIL: :kUNTIL,
          KEYWORD_UNTIL_MODIFIER: :kUNTIL_MOD,
          KEYWORD_WHEN: :kWHEN,
          KEYWORD_WHILE: :kWHILE,
          KEYWORD_WHILE_MODIFIER: :kWHILE_MOD,
          KEYWORD_YIELD: :kYIELD,
          KEYWORD___ENCODING__: :k__ENCODING__,
          KEYWORD___FILE__: :k__FILE__,
          KEYWORD___LINE__: :k__LINE__,
          LABEL: :tLABEL,
          LABEL_END: :tLABEL_END,
          LAMBDA_BEGIN: :tLAMBEG,
          LESS: :tLT,
          LESS_EQUAL: :tLEQ,
          LESS_EQUAL_GREATER: :tCMP,
          LESS_LESS: :tLSHFT,
          LESS_LESS_EQUAL: :tOP_ASGN,
          METHOD_NAME: :tFID,
          MINUS: :tMINUS,
          MINUS_EQUAL: :tOP_ASGN,
          MINUS_GREATER: :tLAMBDA,
          NEWLINE: :tNL,
          NUMBERED_REFERENCE: :tNTH_REF,
          PARENTHESIS_LEFT: :tLPAREN,
          PARENTHESIS_LEFT_PARENTHESES: :tLPAREN_ARG,
          PARENTHESIS_RIGHT: :tRPAREN,
          PERCENT: :tPERCENT,
          PERCENT_EQUAL: :tOP_ASGN,
          PERCENT_LOWER_I: :tQSYMBOLS_BEG,
          PERCENT_LOWER_W: :tQWORDS_BEG,
          PERCENT_UPPER_I: :tSYMBOLS_BEG,
          PERCENT_UPPER_W: :tWORDS_BEG,
          PERCENT_LOWER_X: :tXSTRING_BEG,
          PLUS: :tPLUS,
          PLUS_EQUAL: :tOP_ASGN,
          PIPE_EQUAL: :tOP_ASGN,
          PIPE: :tPIPE,
          PIPE_PIPE: :tOROP,
          PIPE_PIPE_EQUAL: :tOP_ASGN,
          QUESTION_MARK: :tEH,
          REGEXP_BEGIN: :tREGEXP_BEG,
          REGEXP_END: :tSTRING_END,
          SEMICOLON: :tSEMI,
          SLASH: :tDIVIDE,
          SLASH_EQUAL: :tOP_ASGN,
          STAR: :tSTAR2,
          STAR_EQUAL: :tOP_ASGN,
          STAR_STAR: :tPOW,
          STAR_STAR_EQUAL: :tOP_ASGN,
          STRING_BEGIN: :tSTRING_BEG,
          STRING_CONTENT: :tSTRING_CONTENT,
          STRING_END: :tSTRING_END,
          SYMBOL_BEGIN: :tSYMBEG,
          TILDE: :tTILDE,
          UAMPERSAND: :tAMPER,
          UCOLON_COLON: :tCOLON3,
          UDOT_DOT: :tDOT2,
          UDOT_DOT_DOT: :tBDOT3,
          UMINUS: :tUMINUS,
          UMINUS_NUM: :tUNARY_NUM,
          UPLUS: :tUPLUS,
          USTAR: :tSTAR,
          USTAR_STAR: :tPOW,
          WORDS_SEP: :tSPACE
        }
        private_constant :TYPES
        # The Parser::Source::Buffer that the tokens were lexed from.
        attr_reader :source_buffer
        # An array of prism tokens that we lexed.
        attr_reader :lexed
        # A hash that maps offsets in bytes to offsets in characters.
        attr_reader :offset_cache
        # Initialize the lexer with the given source buffer, prism tokens, and
        # offset cache.
        def initialize(source_buffer, lexed, offset_cache)
          @source_buffer = source_buffer
          @lexed = lexed
          @offset_cache = offset_cache
        end
        Range = ::Parser::Source::Range # :nodoc:
        private_constant :Range
        # Convert the prism tokens into the expected format for the parser gem.
        def to_a
          tokens = []
          index = 0
          while index < lexed.length
            token, = lexed[index]
            index += 1
            next if token.type == :IGNORED_NEWLINE || token.type == :EOF
            type = TYPES.fetch(token.type)
            value = token.value
            location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])
            case type
            when :tCHARACTER
              value.delete_prefix!("?")
            when :tCOMMENT
              if token.type == :EMBDOC_BEGIN
                until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
                  value += next_token.value
                  index += 1
                end
                value += next_token.value
                location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
                index += 1
              else
                value.chomp!
                location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
              end
            when :tNL
              value = nil
            when :tFLOAT
              value = Float(value)
            when :tIMAGINARY
              value = parse_complex(value)
            when :tINTEGER
              if value.start_with?("+")
                tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
                location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
              end
              value = Integer(value)
            when :tLABEL
              value.chomp!(":")
            when :tLABEL_END
              value.chomp!(":")
            when :tNTH_REF
              value = Integer(value.delete_prefix("$"))
            when :tOP_ASGN
              value.chomp!("=")
            when :tRATIONAL
              value = parse_rational(value)
            when :tSPACE
              value = nil
            when :tSTRING_BEG
              if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
                next_location = token.location.join(next_token.location)
                type = :tSTRING
                value = ""
                location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
                index += 1
              elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
                next_location = token.location.join(next_next_token.location)
                type = :tSTRING
                value = next_token.value
                location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
                index += 2
              elsif value.start_with?("<<")
                quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
                value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
              end
            when :tSTRING_DVAR
              value = nil
            when :tSTRING_END
              if token.type == :REGEXP_END
                value = value[0]
                location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
              end
            when :tSYMBEG
              if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
                next_location = token.location.join(next_token.location)
                type = :tSYMBOL
                value = next_token.value
                value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
                location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
                index += 1
              end
            when :tFID
              if tokens[-1][0] == :kDEF
                type = :tIDENTIFIER
              end
            end
            tokens << [type, [value, location]]
            if token.type == :REGEXP_END
              tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
            end
          end
          tokens
        end
        private
        # Parse a complex from the string representation.
        def parse_complex(value)
          value.chomp!("i")
          if value.end_with?("r")
            Complex(0, parse_rational(value))
          elsif value.start_with?(/0[BbOoDdXx]/)
            Complex(0, Integer(value))
          else
            Complex(0, value)
          end
        end
        # Parse a rational from the string representation.
        def parse_rational(value)
          value.chomp!("r")
          if value.start_with?(/0[BbOoDdXx]/)
            Rational(Integer(value))
          else
            Rational(value)
          end
        end
      end
    end
  end
 end
--- a/lib/prism/translation/parser/rubocop.rb
+++ b/lib/prism/translation/parser/rubocop.rb
@ -0,0 +1,37 @@
 # frozen_string_literal: true
 require "parser"
 require "rubocop"
 require "prism"
 require "prism/translation/parser"
 module Prism
  module Translation
    class Parser
      # This is the special version number that should be used in rubocop
      # configuration files to trigger using prism.
      VERSION_3_3 = 80_82_73_83_77.33
      # This module gets prepended into RuboCop::AST::ProcessedSource.
      module ProcessedSource
        # Redefine parser_class so that we can inject the prism parser into the
        # list of known parsers.
        def parser_class(ruby_version)
          if ruby_version == Prism::Translation::Parser::VERSION_3_3
            require "prism/translation/parser"
            Prism::Translation::Parser
          else
            super
          end
        end
      end
    end
  end
 end
 # :stopdoc:
 RuboCop::AST::ProcessedSource.prepend(Prism::Translation::Parser::ProcessedSource)
 known_rubies = RuboCop::TargetRuby.const_get(:KNOWN_RUBIES)
 RuboCop::TargetRuby.send(:remove_const, :KNOWN_RUBIES)
 RuboCop::TargetRuby::KNOWN_RUBIES = [*known_rubies, Prism::Translation::Parser::VERSION_3_3].freeze
--- a/test/prism/newline_test.rb
+++ b/test/prism/newline_test.rb
@ -7,7 +7,7 @@ return unless defined?(RubyVM::InstructionSequence)
 module Prism
  class NewlineTest < TestCase
    base = File.expand_path("../", __FILE__)
-    filepaths = Dir["*.rb", base: base] - %w[encoding_test.rb unescape_test.rb]
+    filepaths = Dir["*.rb", base: base] - %w[encoding_test.rb parser_test.rb unescape_test.rb]
    filepaths.each do |relative|
      define_method("test_newline_flags_#{relative}") do
--- a/test/prism/parser_test.rb
+++ b/test/prism/parser_test.rb
@ -0,0 +1,188 @@
 # frozen_string_literal: true
 require_relative "test_helper"
 begin
  require "parser/current"
 rescue LoadError
  # In CRuby's CI, we're not going to test against the parser gem because we
  # don't want to have to install it. So in this case we'll just skip this test.
  return
 end
 # First, opt in to every AST feature.
 Parser::Builders::Default.modernize
 # Modify the source map == check so that it doesn't check against the node
 # itself so we don't get into a recursive loop.
 Parser::Source::Map.prepend(
  Module.new {
    def ==(other)
      self.class == other.class &&
        (instance_variables - %i[@node]).map do |ivar|
          instance_variable_get(ivar) == other.instance_variable_get(ivar)
        end.reduce(:&)
    end
  }
 )
 # Next, ensure that we're comparing the nodes and also comparing the source
 # ranges so that we're getting all of the necessary information.
 Parser::AST::Node.prepend(
  Module.new {
    def ==(other)
      super && (location == other.location)
    end
  }
 )
 module Prism
  class ParserTest < TestCase
    base = File.join(__dir__, "fixtures")
    # These files are either failing to parse or failing to translate, so we'll
    # skip them for now.
    skip_all = %w[
      arrays.txt
      constants.txt
      dash_heredocs.txt
      dos_endings.txt
      embdoc_no_newline_at_end.txt
      heredocs_with_ignored_newlines.txt
      regex.txt
      spanning_heredoc.txt
      spanning_heredoc_newlines.txt
      tilde_heredocs.txt
      unescaping.txt
    ]
    # Not sure why these files are failing on JRuby, but skipping them for now.
    if RUBY_ENGINE == "jruby"
      skip_all.push("emoji_method_calls.txt", "symbols.txt")
    end
    # These files are failing to translate their lexer output into the lexer
    # output expected by the parser gem, so we'll skip them for now.
    skip_tokens = %w[
      comments.txt
      endless_range_in_conditional.txt
      heredoc_with_comment.txt
      heredoc_with_escaped_newline_at_start.txt
      heredocs_leading_whitespace.txt
      heredocs_nested.txt
      heredocs_with_ignored_newlines_and_non_empty.txt
      indented_file_end.txt
      non_alphanumeric_methods.txt
      range_begin_open_inclusive.txt
      single_quote_heredocs.txt
      strings.txt
      xstring.txt
    ]
    Dir["*.txt", base: base].each do |name|
      next if skip_all.include?(name)
      define_method("test_#{name}") do
        assert_equal_parses(File.join(base, name), compare_tokens: !skip_tokens.include?(name))
      end
    end
    private
    def assert_equal_parses(filepath, compare_tokens: true)
      buffer = Parser::Source::Buffer.new(filepath, 1)
      buffer.source = File.read(filepath)
      parser = Parser::CurrentRuby.default_parser
      parser.diagnostics.consumer = ->(*) {}
      parser.diagnostics.all_errors_are_fatal = true
      expected_ast, expected_comments, expected_tokens =
        begin
          parser.tokenize(buffer)
        rescue ArgumentError, Parser::SyntaxError
          return
        end
      actual_ast, actual_comments, actual_tokens =
        Prism::Translation::Parser.new.tokenize(buffer)
      assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) }
      assert_equal_tokens(expected_tokens, actual_tokens) if compare_tokens
      assert_equal_comments(expected_comments, actual_comments)
    end
    def assert_equal_asts_message(expected_ast, actual_ast)
      queue = [[expected_ast, actual_ast]]
      while (left, right = queue.shift)
        if left.type != right.type
          return "expected: #{left.type}\nactual: #{right.type}"
        end
        if left.location != right.location
          return "expected:\n#{left.inspect}\n#{left.location}\nactual:\n#{right.inspect}\n#{right.location}"
        end
        if left.type == :str && left.children[0] != right.children[0]
          return "expected: #{left.inspect}\nactual: #{right.inspect}"
        end
        left.children.zip(right.children).each do |left_child, right_child|
          queue << [left_child, right_child] if left_child.is_a?(Parser::AST::Node)
        end
      end
      "expected: #{expected_ast.inspect}\nactual: #{actual_ast.inspect}"
    end
    def assert_equal_tokens(expected_tokens, actual_tokens)
      if expected_tokens != actual_tokens
        expected_index = 0
        actual_index = 0
        while expected_index < expected_tokens.length
          expected_token = expected_tokens[expected_index]
          actual_token = actual_tokens[actual_index]
          expected_index += 1
          actual_index += 1
          # The parser gem always has a space before a string end in list
          # literals, but we don't. So we'll skip over the space.
          if expected_token[0] == :tSPACE && actual_token[0] == :tSTRING_END
            expected_index += 1
            next
          end
          # There are a lot of tokens that have very specific meaning according
          # to the context of the parser. We don't expose that information in
          # prism, so we need to normalize these tokens a bit.
          case actual_token[0]
          when :kDO
            actual_token[0] = expected_token[0] if %i[kDO_BLOCK kDO_LAMBDA].include?(expected_token[0])
          when :tLPAREN
            actual_token[0] = expected_token[0] if expected_token[0] == :tLPAREN2
          when :tLCURLY
            actual_token[0] = expected_token[0] if %i[tLBRACE tLBRACE_ARG].include?(expected_token[0])
          when :tPOW
            actual_token[0] = expected_token[0] if expected_token[0] == :tDSTAR
          end
          # Now we can assert that the tokens are actually equal.
          assert_equal expected_token, actual_token, -> {
            "expected: #{expected_token.inspect}\n" \
            "actual: #{actual_token.inspect}"
          }
        end
      end
    end
    def assert_equal_comments(expected_comments, actual_comments)
      assert_equal expected_comments, actual_comments, -> {
        "expected: #{expected_comments.inspect}\n" \
        "actual: #{actual_comments.inspect}"
      }
    end
  end
 end