[ruby/prism] Split parse result based on type

https://github.com/ruby/prism/commit/17194e096d
This commit is contained in:
Kevin Newton 2024-04-19 14:53:39 -04:00 committed by git
parent cb711df36f
commit 23be6599a2
7 changed files with 93 additions and 27 deletions

View File

@ -37,7 +37,7 @@ module Prism
private_constant :LexRipper
# :call-seq:
# Prism::lex_compat(source, **options) -> ParseResult
# Prism::lex_compat(source, **options) -> LexCompat::Result
#
# Returns a parse result whose value is an array of tokens that closely
# resembles the return value of Ripper::lex. The main difference is that the

View File

@ -350,7 +350,7 @@ module Prism
node, comments, magic_comments, data_loc, errors, warnings = loader.load_nodes
tokens.each { |token,| token.value.force_encoding(loader.encoding) }
ParseResult.new([node, tokens], comments, magic_comments, data_loc, errors, warnings, source)
ParseLexResult.new([node, tokens], comments, magic_comments, data_loc, errors, warnings, source)
end
end

View File

@ -10,6 +10,23 @@ module Prism
# generally lines up. However, there are a few cases that require special
# handling.
class LexCompat # :nodoc:
# A result class specialized for holding tokens produced by the lexer.
class Result < Prism::Result
# The list of tokens that were produced by the lexer.
attr_reader :value
# Create a new lex compat result object with the given values.
def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
@value = value
super(comments, magic_comments, data_loc, errors, warnings, source)
end
# Implement the hash pattern matching interface for Result.
def deconstruct_keys(keys)
super.merge!(value: value)
end
end
# This is a mapping of prism token types to Ripper token types. This is a
# many-to-one mapping because we split up our token types, whereas Ripper
# tends to group them.
@ -844,7 +861,7 @@ module Prism
# We sort by location to compare against Ripper's output
tokens.sort_by!(&:location)
ParseResult.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.new(source))
Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, Source.new(source))
end
end

View File

@ -438,14 +438,9 @@ module Prism
end
# This represents the result of a call to ::parse or ::parse_file. It contains
# the AST, any comments that were encounters, and any errors that were
# encountered.
class ParseResult
# The value that was generated by parsing. Normally this holds the AST, but
# it can sometimes how a list of tokens or other results passed back from
# the parser.
attr_reader :value
# the requested structure, any comments that were encounters, and any errors
# that were encountered.
class Result
# The list of comments that were encountered during parsing.
attr_reader :comments
@ -466,9 +461,8 @@ module Prism
# A Source instance that represents the source code that was parsed.
attr_reader :source
# Create a new parse result object with the given values.
def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
@value = value
# Create a new result object with the given values.
def initialize(comments, magic_comments, data_loc, errors, warnings, source)
@comments = comments
@magic_comments = magic_comments
@data_loc = data_loc
@ -477,9 +471,9 @@ module Prism
@source = source
end
# Implement the hash pattern matching interface for ParseResult.
# Implement the hash pattern matching interface for Result.
def deconstruct_keys(keys)
{ value: value, comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
{ comments: comments, magic_comments: magic_comments, data_loc: data_loc, errors: errors, warnings: warnings }
end
# Returns the encoding of the source code that was parsed.
@ -500,6 +494,58 @@ module Prism
end
end
# This is a result specific to the `parse` and `parse_file` methods.
class ParseResult < Result
# The syntax tree that was parsed from the source code.
attr_reader :value
# Create a new parse result object with the given values.
def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
@value = value
super(comments, magic_comments, data_loc, errors, warnings, source)
end
# Implement the hash pattern matching interface for ParseResult.
def deconstruct_keys(keys)
super.merge!(value: value)
end
end
# This is a result specific to the `lex` and `lex_file` methods.
class LexResult < Result
# The list of tokens that were parsed from the source code.
attr_reader :value
# Create a new lex result object with the given values.
def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
@value = value
super(comments, magic_comments, data_loc, errors, warnings, source)
end
# Implement the hash pattern matching interface for LexResult.
def deconstruct_keys(keys)
super.merge!(value: value)
end
end
# This is a result specific to the `parse_lex` and `parse_lex_file` methods.
class ParseLexResult < Result
# A tuple of the syntax tree and the list of tokens that were parsed from
# the source code.
attr_reader :value
# Create a new parse lex result object with the given values.
def initialize(value, comments, magic_comments, data_loc, errors, warnings, source)
@value = value
super(comments, magic_comments, data_loc, errors, warnings, source)
end
# Implement the hash pattern matching interface for ParseLexResult.
def deconstruct_keys(keys)
super.merge!(value: value)
end
end
# This represents a token from the Ruby source.
class Token
# The Source object that represents the source this token came from.

View File

@ -58,8 +58,6 @@ module Prism
# Walk the tree and mark nodes that are on a new line.
def mark_newlines!
value = self.value
raise "This method should only be called on a parse result that contains a node" unless Node === value
value.accept(Newlines.new(Array.new(1 + source.offsets.size, false))) # steep:ignore
end
end

View File

@ -19,7 +19,9 @@ VALUE rb_cPrismEmbDocComment;
VALUE rb_cPrismMagicComment;
VALUE rb_cPrismParseError;
VALUE rb_cPrismParseWarning;
VALUE rb_cPrismResult;
VALUE rb_cPrismParseResult;
VALUE rb_cPrismParseLexResult;
VALUE rb_cPrismDebugEncoding;
@ -515,7 +517,7 @@ parser_warnings(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
* Create a new parse result from the given parser, value, encoding, and source.
*/
static VALUE
parse_result_create(pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source) {
parse_result_create(VALUE class, pm_parser_t *parser, VALUE value, rb_encoding *encoding, VALUE source) {
VALUE result_argv[] = {
value,
parser_comments(parser, source),
@ -526,7 +528,7 @@ parse_result_create(pm_parser_t *parser, VALUE value, rb_encoding *encoding, VAL
source
};
return rb_class_new_instance(7, result_argv, rb_cPrismParseResult);
return rb_class_new_instance(7, result_argv, class);
}
/******************************************************************************/
@ -635,7 +637,7 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod
value = parse_lex_data.tokens;
}
VALUE result = parse_result_create(&parser, value, parse_lex_data.encoding, source);
VALUE result = parse_result_create(rb_cPrismParseLexResult, &parser, value, parse_lex_data.encoding, source);
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
@ -700,7 +702,7 @@ parse_input(pm_string_t *input, const pm_options_t *options) {
VALUE source = pm_source_new(&parser, encoding);
VALUE value = pm_ast_new(&parser, node, encoding, source);
VALUE result = parse_result_create(&parser, value, encoding, source) ;
VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source) ;
pm_node_destroy(&parser, node);
pm_parser_free(&parser);
@ -804,7 +806,7 @@ parse_stream(int argc, VALUE *argv, VALUE self) {
VALUE source = pm_source_new(&parser, encoding);
VALUE value = pm_ast_new(&parser, node, encoding, source);
VALUE result = parse_result_create(&parser, value, encoding, source);
VALUE result = parse_result_create(rb_cPrismParseResult, &parser, value, encoding, source);
pm_node_destroy(&parser, node);
pm_buffer_free(&buffer);
@ -1362,7 +1364,10 @@ Init_prism(void) {
rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);
rb_cPrismResult = rb_define_class_under(rb_cPrism, "Result", rb_cObject);
rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cPrismResult);
rb_cPrismParseLexResult = rb_define_class_under(rb_cPrism, "ParseLexResult", rb_cPrismResult);
// Intern all of the options that we support so that we don't have to do it
// every time we parse.

View File

@ -143,7 +143,7 @@ module Prism
length = load_varuint
lex_state = load_varuint
location = Location.new(@source, start, length)
tokens << [Prism::Token.new(source, type, location.slice, location), lex_state]
tokens << [Token.new(source, type, location.slice, location), lex_state]
end
tokens
@ -158,7 +158,7 @@ module Prism
tokens.each { |token,| token.value.force_encoding(encoding) }
raise "Expected to consume all bytes while deserializing" unless @io.eof?
Prism::ParseResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, @source)
LexResult.new(tokens, comments, magic_comments, data_loc, errors, warnings, @source)
end
def load_nodes
@ -177,7 +177,7 @@ module Prism
def load_result
node, comments, magic_comments, data_loc, errors, warnings = load_nodes
Prism::ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, @source)
ParseResult.new(node, comments, magic_comments, data_loc, errors, warnings, @source)
end
private