[ruby/prism] Split parse result based on type

https://github.com/ruby/prism/commit/17194e096d
2024-04-19 14:53:39 -04:00 · 2024-04-19 14:53:39 -04:00 · 23be6599a2
commit 23be6599a2
parent cb711df36f
7 changed files with 93 additions and 27 deletions
--- a/lib/prism.rb
+++ b/lib/prism.rb
@ -37,7 +37,7 @@ module Prism
  private_constant :LexRipper

  # :call-seq:
-  #   Prism::lex_compat(source, **options) -> ParseResult
+  #   Prism::lex_compat(source, **options) -> LexCompat::Result
  #
  # Returns a parse result whose value is an array of tokens that closely
  # resembles the return value of Ripper::lex. The main difference is that the
--- a/lib/prism/ffi.rb
+++ b/lib/prism/ffi.rb
@ -350,7 +350,7 @@ module Prism
        node, comments, magic_comments, data_loc, errors, warnings = loader.load_nodes
        tokens.each { |token,| token.value.force_encoding(loader.encoding) }

-        ParseResult.new([node, tokens], comments, magic_comments, data_loc, errors, warnings, source)
+        ParseLexResult.new([node, tokens], comments, magic_comments, data_loc, errors, warnings, source)
      end
    end

--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@ -10,6 +10,23 @@ module Prism
  # generally lines up. However, there are a few cases that require special
  # handling.
  class LexCompat # :nodoc:
+    # A result class specialized for holding tokens produced by the lexer.
+    class Result < Prism::Result
+      # The list of tokens that were produced by the lexer.
+      attr_reader :value
+
+      # Create a new lex compat result object with the given values.
+      def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+        @value = value
+        super(comments, magic_comments, data_loc, errors, warnings, source)
+      end
+
+      # Implement the hash pattern matching interface for Result.
+      def deconstruct_keys(keys)
+        super.merge!(value: value)
+      end
+    end
+
    # This is a mapping of prism token types to Ripper token types. This is a
    # many-to-one mapping because we split up our token types, whereas Ripper
    # tends to group them.
@ -844,7 +861,7 @@ module Prism
      # We sort by location to compare against Ripper's output
      tokens.sort_by!(&:location)

-      ParseResult.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.new(source))
+      Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.new(source))
    end
  end

--- a/lib/prism/parse_result.rb
+++ b/lib/prism/parse_result.rb
@ -438,14 +438,9 @@ module Prism
  end

  # This represents the result of a call to ::parse or ::parse_file. It contains
-  # the AST, any comments that were encounters, and any errors that were
-  # encountered.
-  class ParseResult
-    # The value that was generated by parsing. Normally this holds the AST, but
-    # it can sometimes how a list of tokens or other results passed back from
-    # the parser.
-    attr_reader :value
-
+  # the requested structure, any comments that were encounters, and any errors
+  # that were encountered.
+  class Result
    # The list of comments that were encountered during parsing.
    attr_reader :comments

@ -466,9 +461,8 @@ module Prism
    # A Source instance that represents the source code that was parsed.
    attr_reader :source

-    # Create a new parse result object with the given values.
-    def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
-      @value = value
+    # Create a new result object with the given values.
+    def initialize(comments, magic_comments, data_loc, errors, warnings, source)
      @comments = comments
      @magic_comments = magic_comments
      @data_loc = data_loc
@ -477,9 +471,9 @@ module Prism
      @source = source
    end

-    # Implement the hash pattern matching interface for ParseResult.
+    # Implement the hash pattern matching interface for Result.
    def deconstruct_keys(keys)
-      { value: value, comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
+      { comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
    end

    # Returns the encoding of the source code that was parsed.
@ -500,6 +494,58 @@ module Prism
    end
  end

+  # This is a result specific to the `parse` and `parse_file` methods.
+  class ParseResult < Result
+    # The syntax tree that was parsed from the source code.
+    attr_reader :value
+
+    # Create a new parse result object with the given values.
+    def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+      @value = value
+      super(comments, magic_comments, data_loc, errors, warnings, source)
+    end
+
+    # Implement the hash pattern matching interface for ParseResult.
+    def deconstruct_keys(keys)
+      super.merge!(value: value)
+    end
+  end
+
+  # This is a result specific to the `lex` and `lex_file` methods.
+  class LexResult < Result
+    # The list of tokens that were parsed from the source code.
+    attr_reader :value
+
+    # Create a new lex result object with the given values.
+    def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+      @value = value
+      super(comments, magic_comments, data_loc, errors, warnings, source)
+    end
+
+    # Implement the hash pattern matching interface for LexResult.
+    def deconstruct_keys(keys)
+      super.merge!(value: value)
+    end
+  end
+
+  # This is a result specific to the `parse_lex` and `parse_lex_file` methods.
+  class ParseLexResult < Result
+    # A tuple of the syntax tree and the list of tokens that were parsed from
+    # the source code.
+    attr_reader :value
+
+    # Create a new parse lex result object with the given values.
+    def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
+      @value = value
+      super(comments, magic_comments, data_loc, errors, warnings, source)
+    end
+
+    # Implement the hash pattern matching interface for ParseLexResult.
+    def deconstruct_keys(keys)
+      super.merge!(value: value)
+    end
+  end
+
  # This represents a token from the Ruby source.
  class Token
    # The Source object that represents the source this token came from.
--- a/lib/prism/parse_result/newlines.rb
+++ b/lib/prism/parse_result/newlines.rb
@ -58,8 +58,6 @@ module Prism

    # Walk the tree and mark nodes that are on a new line.
    def mark_newlines!
-      value = self.value
-      raise "This method should only be called on a parse result that contains a node" unless Node === value
      value.accept(Newlines.new(Array.new(1 + source.offsets.size, false))) # steep:ignore
    end
  end
--- a/prism/extension.c
+++ b/prism/extension.c
@ -19,7 +19,9 @@ VALUE rb_cPrismEmbDocComment;
 VALUE rb_cPrismMagicComment;
 VALUE rb_cPrismParseError;
 VALUE rb_cPrismParseWarning;
+VALUE rb_cPrismResult;
 VALUE rb_cPrismParseResult;
+VALUE rb_cPrismParseLexResult;

 VALUE rb_cPrismDebugEncoding;

@ -515,7 +517,7 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
 * Create a new parse result from the given parser, value, encoding, and source.
 */
 static VALUE
-parse_result_create(pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source) {
+parse_result_create(VALUE class, pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source) {
    VALUE result_argv[] = {
        value,
        parser_comments(parser, source),
@ -526,7 +528,7 @@ parse_result_create(pm_parser_t *parser, VALUE value, rb_encoding *encoding, VAL
        source
    };

-    return rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
+    return rb_class_new_instance(7, result_argv, class);
 }

 /******************************************************************************/
@ -635,7 +637,7 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
        value = parse_lex_data.tokens;
    }

-    VALUE result = parse_result_create(&parser, value, parse_lex_data.encoding, source);
+    VALUE result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source);
    pm_node_destroy(&parser, node);
    pm_parser_free(&parser);

@ -700,7 +702,7 @@ parse_input(pm_string_t *input, const pm_options_t *options) {

    VALUE source = pm_source_new(&parser, encoding);
    VALUE value = pm_ast_new(&parser, node, encoding, source);
-    VALUE result = parse_result_create(&parser, value, encoding, source) ;
+    VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source) ;

    pm_node_destroy(&parser, node);
    pm_parser_free(&parser);
@ -804,7 +806,7 @@ parse_stream(int argc, VALUE *argv, VALUE self) {

    VALUE source = pm_source_new(&parser, encoding);
    VALUE value = pm_ast_new(&parser, node, encoding, source);
-    VALUE result = parse_result_create(&parser, value, encoding, source);
+    VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source);

    pm_node_destroy(&parser, node);
    pm_buffer_free(&buffer);
@ -1362,7 +1364,10 @@ Init_prism(void) {
    rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
    rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
    rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
-    rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
+
+    rb_cPrismResult = rb_define_class_under(rb_cPrism, "Result", rb_cObject);
+    rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cPrismResult);
+    rb_cPrismParseLexResult = rb_define_class_under(rb_cPrism, "ParseLexResult", rb_cPrismResult);

    // Intern all of the options that we support so that we don't have to do it
    // every time we parse.
--- a/prism/templates/lib/prism/serialize.rb.erb
+++ b/prism/templates/lib/prism/serialize.rb.erb
@ -143,7 +143,7 @@ module Prism
          length = load_varuint
          lex_state = load_varuint
          location = Location.new(@source, start, length)
-          tokens << [Prism::Token.new(source, type, location.slice, location), lex_state]
+          tokens << [Token.new(source, type, location.slice, location), lex_state]
        end

        tokens
@ -158,7 +158,7 @@ module Prism
        tokens.each { |token,| token.value.force_encoding(encoding) }

        raise "Expected to consume all bytes while deserializing" unless @io.eof?
-        Prism::ParseResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, @source)
+        LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, @source)
      end

      def load_nodes
@ -177,7 +177,7 @@ module Prism

      def load_result
        node, comments, magic_comments, data_loc, errors, warnings = load_nodes
-        Prism::ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, @source)
+        ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, @source)
      end

      private