[ruby/prism] Add parser translation
https://github.com/ruby/prism/commit/8cdec8070c
This commit is contained in:
parent
223910b329
commit
f12ebe1188
@ -26,6 +26,7 @@ module Prism
|
||||
autoload :Pack, "prism/pack"
|
||||
autoload :Pattern, "prism/pattern"
|
||||
autoload :Serialize, "prism/serialize"
|
||||
autoload :Translation, "prism/translation"
|
||||
autoload :Visitor, "prism/visitor"
|
||||
|
||||
# Some of these constants are not meant to be exposed, so marking them as
|
||||
|
@ -81,7 +81,7 @@ module Prism
|
||||
class RationalNode < Node
|
||||
# Returns the value of the node as a Ruby Rational.
|
||||
def value
|
||||
Rational(numeric.is_a?(IntegerNode) && !numeric.decimal? ? numeric.value : slice.chomp("r"))
|
||||
Rational(numeric.is_a?(IntegerNode) ? numeric.value : slice.chomp("r"))
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -31,6 +31,7 @@ Gem::Specification.new do |spec|
|
||||
"docs/javascript.md",
|
||||
"docs/local_variable_depth.md",
|
||||
"docs/mapping.md",
|
||||
"docs/parser_translation.md",
|
||||
"docs/parsing_rules.md",
|
||||
"docs/releasing.md",
|
||||
"docs/ripper.md",
|
||||
@ -74,16 +75,21 @@ Gem::Specification.new do |spec|
|
||||
"lib/prism/ffi.rb",
|
||||
"lib/prism/lex_compat.rb",
|
||||
"lib/prism/mutation_compiler.rb",
|
||||
"lib/prism/node.rb",
|
||||
"lib/prism/node_ext.rb",
|
||||
"lib/prism/node_inspector.rb",
|
||||
"lib/prism/node.rb",
|
||||
"lib/prism/pack.rb",
|
||||
"lib/prism/parse_result.rb",
|
||||
"lib/prism/parse_result/comments.rb",
|
||||
"lib/prism/parse_result/newlines.rb",
|
||||
"lib/prism/pattern.rb",
|
||||
"lib/prism/ripper_compat.rb",
|
||||
"lib/prism/serialize.rb",
|
||||
"lib/prism/parse_result/comments.rb",
|
||||
"lib/prism/parse_result/newlines.rb",
|
||||
"lib/prism/translation.rb",
|
||||
"lib/prism/translation/parser.rb",
|
||||
"lib/prism/translation/parser/compiler.rb",
|
||||
"lib/prism/translation/parser/lexer.rb",
|
||||
"lib/prism/translation/parser/rubocop.rb",
|
||||
"lib/prism/visitor.rb",
|
||||
"src/diagnostic.c",
|
||||
"src/encoding.c",
|
||||
|
11
lib/prism/translation.rb
Normal file
11
lib/prism/translation.rb
Normal file
@ -0,0 +1,11 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Prism
|
||||
# This module is responsible for converting the prism syntax tree into other
|
||||
# syntax trees. At the moment it only supports converting to the
|
||||
# whitequark/parser gem's syntax tree, but support is planned for the
|
||||
# seattlerb/ruby_parser gem's syntax tree as well.
|
||||
module Translation
|
||||
autoload :Parser, "prism/translation/parser"
|
||||
end
|
||||
end
|
136
lib/prism/translation/parser.rb
Normal file
136
lib/prism/translation/parser.rb
Normal file
@ -0,0 +1,136 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require "parser"
|
||||
|
||||
module Prism
|
||||
module Translation
|
||||
# This class is the entry-point for converting a prism syntax tree into the
|
||||
# whitequark/parser gem's syntax tree. It inherits from the base parser for
|
||||
# the parser gem, and overrides the parse* methods to parse with prism and
|
||||
# then translate.
|
||||
class Parser < ::Parser::Base
|
||||
Racc_debug_parser = false # :nodoc:
|
||||
|
||||
def version # :nodoc:
|
||||
33
|
||||
end
|
||||
|
||||
# The default encoding for Ruby files is UTF-8.
|
||||
def default_encoding
|
||||
Encoding::UTF_8
|
||||
end
|
||||
|
||||
def yyerror # :nodoc:
|
||||
end
|
||||
|
||||
# Parses a source buffer and returns the AST.
|
||||
def parse(source_buffer)
|
||||
@source_buffer = source_buffer
|
||||
source = source_buffer.source
|
||||
|
||||
build_ast(
|
||||
Prism.parse(source, filepath: source_buffer.name).value,
|
||||
build_offset_cache(source)
|
||||
)
|
||||
ensure
|
||||
@source_buffer = nil
|
||||
end
|
||||
|
||||
# Parses a source buffer and returns the AST and the source code comments.
|
||||
def parse_with_comments(source_buffer)
|
||||
@source_buffer = source_buffer
|
||||
source = source_buffer.source
|
||||
|
||||
offset_cache = build_offset_cache(source)
|
||||
result = Prism.parse(source, filepath: source_buffer.name)
|
||||
|
||||
[
|
||||
build_ast(result.value, offset_cache),
|
||||
build_comments(result.comments, offset_cache)
|
||||
]
|
||||
ensure
|
||||
@source_buffer = nil
|
||||
end
|
||||
|
||||
# Parses a source buffer and returns the AST, the source code comments,
|
||||
# and the tokens emitted by the lexer.
|
||||
def tokenize(source_buffer, _recover = false)
|
||||
@source_buffer = source_buffer
|
||||
source = source_buffer.source
|
||||
|
||||
offset_cache = build_offset_cache(source)
|
||||
result = Prism.parse_lex(source, filepath: source_buffer.name)
|
||||
program, tokens = result.value
|
||||
|
||||
[
|
||||
build_ast(program, offset_cache),
|
||||
build_comments(result.comments, offset_cache),
|
||||
build_tokens(tokens, offset_cache)
|
||||
]
|
||||
ensure
|
||||
@source_buffer = nil
|
||||
end
|
||||
|
||||
# Since prism resolves num params for us, we don't need to support this
|
||||
# kind of logic here.
|
||||
def try_declare_numparam(node)
|
||||
node.children[0].match?(/\A_[1-9]\z/)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Prism deals with offsets in bytes, while the parser gem deals with
|
||||
# offsets in characters. We need to handle this conversion in order to
|
||||
# build the parser gem AST.
|
||||
#
|
||||
# If the bytesize of the source is the same as the length, then we can
|
||||
# just use the offset directly. Otherwise, we build a hash that functions
|
||||
# as a cache for the conversion.
|
||||
#
|
||||
# This is a good opportunity for some optimizations. If the source file
|
||||
# has any multi-byte characters, this can tank the performance of the
|
||||
# translator. We could make this significantly faster by using a
|
||||
# different data structure for the cache.
|
||||
def build_offset_cache(source)
|
||||
if source.bytesize == source.length
|
||||
-> (offset) { offset }
|
||||
else
|
||||
Hash.new do |hash, offset|
|
||||
hash[offset] = source.byteslice(0, offset).length
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Build the parser gem AST from the prism AST.
|
||||
def build_ast(program, offset_cache)
|
||||
program.accept(Compiler.new(self, offset_cache))
|
||||
end
|
||||
|
||||
# Build the parser gem comments from the prism comments.
|
||||
def build_comments(comments, offset_cache)
|
||||
comments.map do |comment|
|
||||
location = comment.location
|
||||
|
||||
::Parser::Source::Comment.new(
|
||||
::Parser::Source::Range.new(
|
||||
source_buffer,
|
||||
offset_cache[location.start_offset],
|
||||
offset_cache[location.end_offset]
|
||||
)
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
# Build the parser gem tokens from the prism tokens.
|
||||
def build_tokens(tokens, offset_cache)
|
||||
Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a
|
||||
end
|
||||
|
||||
require_relative "parser/compiler"
|
||||
require_relative "parser/lexer"
|
||||
|
||||
private_constant :Compiler
|
||||
private_constant :Lexer
|
||||
end
|
||||
end
|
||||
end
|
1797
lib/prism/translation/parser/compiler.rb
Normal file
1797
lib/prism/translation/parser/compiler.rb
Normal file
File diff suppressed because it is too large
Load Diff
335
lib/prism/translation/parser/lexer.rb
Normal file
335
lib/prism/translation/parser/lexer.rb
Normal file
@ -0,0 +1,335 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Prism
|
||||
module Translation
|
||||
class Parser
|
||||
# Accepts a list of prism tokens and converts them into the expected
|
||||
# format for the parser gem.
|
||||
class Lexer
|
||||
# The direct translating of types between the two lexers.
|
||||
TYPES = {
|
||||
# These tokens should never appear in the output of the lexer.
|
||||
EOF: nil,
|
||||
MISSING: nil,
|
||||
NOT_PROVIDED: nil,
|
||||
IGNORED_NEWLINE: nil,
|
||||
EMBDOC_END: nil,
|
||||
EMBDOC_LINE: nil,
|
||||
__END__: nil,
|
||||
|
||||
# These tokens have more or less direct mappings.
|
||||
AMPERSAND: :tAMPER2,
|
||||
AMPERSAND_AMPERSAND: :tANDOP,
|
||||
AMPERSAND_AMPERSAND_EQUAL: :tOP_ASGN,
|
||||
AMPERSAND_DOT: :tANDDOT,
|
||||
AMPERSAND_EQUAL: :tOP_ASGN,
|
||||
BACK_REFERENCE: :tBACK_REF,
|
||||
BACKTICK: :tXSTRING_BEG,
|
||||
BANG: :tBANG,
|
||||
BANG_EQUAL: :tNEQ,
|
||||
BANG_TILDE: :tNMATCH,
|
||||
BRACE_LEFT: :tLCURLY,
|
||||
BRACE_RIGHT: :tRCURLY,
|
||||
BRACKET_LEFT: :tLBRACK2,
|
||||
BRACKET_LEFT_ARRAY: :tLBRACK,
|
||||
BRACKET_LEFT_RIGHT: :tAREF,
|
||||
BRACKET_LEFT_RIGHT_EQUAL: :tASET,
|
||||
BRACKET_RIGHT: :tRBRACK,
|
||||
CARET: :tCARET,
|
||||
CARET_EQUAL: :tOP_ASGN,
|
||||
CHARACTER_LITERAL: :tCHARACTER,
|
||||
CLASS_VARIABLE: :tCVAR,
|
||||
COLON: :tCOLON,
|
||||
COLON_COLON: :tCOLON2,
|
||||
COMMA: :tCOMMA,
|
||||
COMMENT: :tCOMMENT,
|
||||
CONSTANT: :tCONSTANT,
|
||||
DOT: :tDOT,
|
||||
DOT_DOT: :tDOT2,
|
||||
DOT_DOT_DOT: :tDOT3,
|
||||
EMBDOC_BEGIN: :tCOMMENT,
|
||||
EMBEXPR_BEGIN: :tSTRING_DBEG,
|
||||
EMBEXPR_END: :tSTRING_DEND,
|
||||
EMBVAR: :tSTRING_DVAR,
|
||||
EQUAL: :tEQL,
|
||||
EQUAL_EQUAL: :tEQ,
|
||||
EQUAL_EQUAL_EQUAL: :tEQQ,
|
||||
EQUAL_GREATER: :tASSOC,
|
||||
EQUAL_TILDE: :tMATCH,
|
||||
FLOAT: :tFLOAT,
|
||||
FLOAT_IMAGINARY: :tIMAGINARY,
|
||||
FLOAT_RATIONAL: :tRATIONAL,
|
||||
FLOAT_RATIONAL_IMAGINARY: :tIMAGINARY,
|
||||
GLOBAL_VARIABLE: :tGVAR,
|
||||
GREATER: :tGT,
|
||||
GREATER_EQUAL: :tGEQ,
|
||||
GREATER_GREATER: :tRSHFT,
|
||||
GREATER_GREATER_EQUAL: :tOP_ASGN,
|
||||
HEREDOC_START: :tSTRING_BEG,
|
||||
HEREDOC_END: :tSTRING_END,
|
||||
IDENTIFIER: :tIDENTIFIER,
|
||||
INSTANCE_VARIABLE: :tIVAR,
|
||||
INTEGER: :tINTEGER,
|
||||
INTEGER_IMAGINARY: :tIMAGINARY,
|
||||
INTEGER_RATIONAL: :tRATIONAL,
|
||||
INTEGER_RATIONAL_IMAGINARY: :tIMAGINARY,
|
||||
KEYWORD_ALIAS: :kALIAS,
|
||||
KEYWORD_AND: :kAND,
|
||||
KEYWORD_BEGIN: :kBEGIN,
|
||||
KEYWORD_BEGIN_UPCASE: :klBEGIN,
|
||||
KEYWORD_BREAK: :kBREAK,
|
||||
KEYWORD_CASE: :kCASE,
|
||||
KEYWORD_CLASS: :kCLASS,
|
||||
KEYWORD_DEF: :kDEF,
|
||||
KEYWORD_DEFINED: :kDEFINED,
|
||||
KEYWORD_DO: :kDO,
|
||||
KEYWORD_DO_LOOP: :kDO_COND,
|
||||
KEYWORD_END: :kEND,
|
||||
KEYWORD_END_UPCASE: :klEND,
|
||||
KEYWORD_ENSURE: :kENSURE,
|
||||
KEYWORD_ELSE: :kELSE,
|
||||
KEYWORD_ELSIF: :kELSIF,
|
||||
KEYWORD_FALSE: :kFALSE,
|
||||
KEYWORD_FOR: :kFOR,
|
||||
KEYWORD_IF: :kIF,
|
||||
KEYWORD_IF_MODIFIER: :kIF_MOD,
|
||||
KEYWORD_IN: :kIN,
|
||||
KEYWORD_MODULE: :kMODULE,
|
||||
KEYWORD_NEXT: :kNEXT,
|
||||
KEYWORD_NIL: :kNIL,
|
||||
KEYWORD_NOT: :kNOT,
|
||||
KEYWORD_OR: :kOR,
|
||||
KEYWORD_REDO: :kREDO,
|
||||
KEYWORD_RESCUE: :kRESCUE,
|
||||
KEYWORD_RESCUE_MODIFIER: :kRESCUE_MOD,
|
||||
KEYWORD_RETRY: :kRETRY,
|
||||
KEYWORD_RETURN: :kRETURN,
|
||||
KEYWORD_SELF: :kSELF,
|
||||
KEYWORD_SUPER: :kSUPER,
|
||||
KEYWORD_THEN: :kTHEN,
|
||||
KEYWORD_TRUE: :kTRUE,
|
||||
KEYWORD_UNDEF: :kUNDEF,
|
||||
KEYWORD_UNLESS: :kUNLESS,
|
||||
KEYWORD_UNLESS_MODIFIER: :kUNLESS_MOD,
|
||||
KEYWORD_UNTIL: :kUNTIL,
|
||||
KEYWORD_UNTIL_MODIFIER: :kUNTIL_MOD,
|
||||
KEYWORD_WHEN: :kWHEN,
|
||||
KEYWORD_WHILE: :kWHILE,
|
||||
KEYWORD_WHILE_MODIFIER: :kWHILE_MOD,
|
||||
KEYWORD_YIELD: :kYIELD,
|
||||
KEYWORD___ENCODING__: :k__ENCODING__,
|
||||
KEYWORD___FILE__: :k__FILE__,
|
||||
KEYWORD___LINE__: :k__LINE__,
|
||||
LABEL: :tLABEL,
|
||||
LABEL_END: :tLABEL_END,
|
||||
LAMBDA_BEGIN: :tLAMBEG,
|
||||
LESS: :tLT,
|
||||
LESS_EQUAL: :tLEQ,
|
||||
LESS_EQUAL_GREATER: :tCMP,
|
||||
LESS_LESS: :tLSHFT,
|
||||
LESS_LESS_EQUAL: :tOP_ASGN,
|
||||
METHOD_NAME: :tFID,
|
||||
MINUS: :tMINUS,
|
||||
MINUS_EQUAL: :tOP_ASGN,
|
||||
MINUS_GREATER: :tLAMBDA,
|
||||
NEWLINE: :tNL,
|
||||
NUMBERED_REFERENCE: :tNTH_REF,
|
||||
PARENTHESIS_LEFT: :tLPAREN,
|
||||
PARENTHESIS_LEFT_PARENTHESES: :tLPAREN_ARG,
|
||||
PARENTHESIS_RIGHT: :tRPAREN,
|
||||
PERCENT: :tPERCENT,
|
||||
PERCENT_EQUAL: :tOP_ASGN,
|
||||
PERCENT_LOWER_I: :tQSYMBOLS_BEG,
|
||||
PERCENT_LOWER_W: :tQWORDS_BEG,
|
||||
PERCENT_UPPER_I: :tSYMBOLS_BEG,
|
||||
PERCENT_UPPER_W: :tWORDS_BEG,
|
||||
PERCENT_LOWER_X: :tXSTRING_BEG,
|
||||
PLUS: :tPLUS,
|
||||
PLUS_EQUAL: :tOP_ASGN,
|
||||
PIPE_EQUAL: :tOP_ASGN,
|
||||
PIPE: :tPIPE,
|
||||
PIPE_PIPE: :tOROP,
|
||||
PIPE_PIPE_EQUAL: :tOP_ASGN,
|
||||
QUESTION_MARK: :tEH,
|
||||
REGEXP_BEGIN: :tREGEXP_BEG,
|
||||
REGEXP_END: :tSTRING_END,
|
||||
SEMICOLON: :tSEMI,
|
||||
SLASH: :tDIVIDE,
|
||||
SLASH_EQUAL: :tOP_ASGN,
|
||||
STAR: :tSTAR2,
|
||||
STAR_EQUAL: :tOP_ASGN,
|
||||
STAR_STAR: :tPOW,
|
||||
STAR_STAR_EQUAL: :tOP_ASGN,
|
||||
STRING_BEGIN: :tSTRING_BEG,
|
||||
STRING_CONTENT: :tSTRING_CONTENT,
|
||||
STRING_END: :tSTRING_END,
|
||||
SYMBOL_BEGIN: :tSYMBEG,
|
||||
TILDE: :tTILDE,
|
||||
UAMPERSAND: :tAMPER,
|
||||
UCOLON_COLON: :tCOLON3,
|
||||
UDOT_DOT: :tDOT2,
|
||||
UDOT_DOT_DOT: :tBDOT3,
|
||||
UMINUS: :tUMINUS,
|
||||
UMINUS_NUM: :tUNARY_NUM,
|
||||
UPLUS: :tUPLUS,
|
||||
USTAR: :tSTAR,
|
||||
USTAR_STAR: :tPOW,
|
||||
WORDS_SEP: :tSPACE
|
||||
}
|
||||
|
||||
private_constant :TYPES
|
||||
|
||||
# The Parser::Source::Buffer that the tokens were lexed from.
|
||||
attr_reader :source_buffer
|
||||
|
||||
# An array of prism tokens that we lexed.
|
||||
attr_reader :lexed
|
||||
|
||||
# A hash that maps offsets in bytes to offsets in characters.
|
||||
attr_reader :offset_cache
|
||||
|
||||
# Initialize the lexer with the given source buffer, prism tokens, and
|
||||
# offset cache.
|
||||
def initialize(source_buffer, lexed, offset_cache)
|
||||
@source_buffer = source_buffer
|
||||
@lexed = lexed
|
||||
@offset_cache = offset_cache
|
||||
end
|
||||
|
||||
Range = ::Parser::Source::Range # :nodoc:
|
||||
private_constant :Range
|
||||
|
||||
# Convert the prism tokens into the expected format for the parser gem.
|
||||
def to_a
|
||||
tokens = []
|
||||
index = 0
|
||||
|
||||
while index < lexed.length
|
||||
token, = lexed[index]
|
||||
index += 1
|
||||
next if token.type == :IGNORED_NEWLINE || token.type == :EOF
|
||||
|
||||
type = TYPES.fetch(token.type)
|
||||
value = token.value
|
||||
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])
|
||||
|
||||
case type
|
||||
when :tCHARACTER
|
||||
value.delete_prefix!("?")
|
||||
when :tCOMMENT
|
||||
if token.type == :EMBDOC_BEGIN
|
||||
until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
|
||||
value += next_token.value
|
||||
index += 1
|
||||
end
|
||||
|
||||
value += next_token.value
|
||||
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
|
||||
index += 1
|
||||
else
|
||||
value.chomp!
|
||||
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
|
||||
end
|
||||
when :tNL
|
||||
value = nil
|
||||
when :tFLOAT
|
||||
value = Float(value)
|
||||
when :tIMAGINARY
|
||||
value = parse_complex(value)
|
||||
when :tINTEGER
|
||||
if value.start_with?("+")
|
||||
tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
|
||||
location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
|
||||
end
|
||||
|
||||
value = Integer(value)
|
||||
when :tLABEL
|
||||
value.chomp!(":")
|
||||
when :tLABEL_END
|
||||
value.chomp!(":")
|
||||
when :tNTH_REF
|
||||
value = Integer(value.delete_prefix("$"))
|
||||
when :tOP_ASGN
|
||||
value.chomp!("=")
|
||||
when :tRATIONAL
|
||||
value = parse_rational(value)
|
||||
when :tSPACE
|
||||
value = nil
|
||||
when :tSTRING_BEG
|
||||
if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
|
||||
next_location = token.location.join(next_token.location)
|
||||
type = :tSTRING
|
||||
value = ""
|
||||
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
|
||||
index += 1
|
||||
elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
|
||||
next_location = token.location.join(next_next_token.location)
|
||||
type = :tSTRING
|
||||
value = next_token.value
|
||||
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
|
||||
index += 2
|
||||
elsif value.start_with?("<<")
|
||||
quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
|
||||
value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
|
||||
end
|
||||
when :tSTRING_DVAR
|
||||
value = nil
|
||||
when :tSTRING_END
|
||||
if token.type == :REGEXP_END
|
||||
value = value[0]
|
||||
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
|
||||
end
|
||||
when :tSYMBEG
|
||||
if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
|
||||
next_location = token.location.join(next_token.location)
|
||||
type = :tSYMBOL
|
||||
value = next_token.value
|
||||
value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
|
||||
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
|
||||
index += 1
|
||||
end
|
||||
when :tFID
|
||||
if tokens[-1][0] == :kDEF
|
||||
type = :tIDENTIFIER
|
||||
end
|
||||
end
|
||||
|
||||
tokens << [type, [value, location]]
|
||||
|
||||
if token.type == :REGEXP_END
|
||||
tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
|
||||
end
|
||||
end
|
||||
|
||||
tokens
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Parse a complex from the string representation.
|
||||
def parse_complex(value)
|
||||
value.chomp!("i")
|
||||
|
||||
if value.end_with?("r")
|
||||
Complex(0, parse_rational(value))
|
||||
elsif value.start_with?(/0[BbOoDdXx]/)
|
||||
Complex(0, Integer(value))
|
||||
else
|
||||
Complex(0, value)
|
||||
end
|
||||
end
|
||||
|
||||
# Parse a rational from the string representation.
|
||||
def parse_rational(value)
|
||||
value.chomp!("r")
|
||||
|
||||
if value.start_with?(/0[BbOoDdXx]/)
|
||||
Rational(Integer(value))
|
||||
else
|
||||
Rational(value)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
37
lib/prism/translation/parser/rubocop.rb
Normal file
37
lib/prism/translation/parser/rubocop.rb
Normal file
@ -0,0 +1,37 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require "parser"
|
||||
require "rubocop"
|
||||
|
||||
require "prism"
|
||||
require "prism/translation/parser"
|
||||
|
||||
module Prism
|
||||
module Translation
|
||||
class Parser
|
||||
# This is the special version number that should be used in rubocop
|
||||
# configuration files to trigger using prism.
|
||||
VERSION_3_3 = 80_82_73_83_77.33
|
||||
|
||||
# This module gets prepended into RuboCop::AST::ProcessedSource.
|
||||
module ProcessedSource
|
||||
# Redefine parser_class so that we can inject the prism parser into the
|
||||
# list of known parsers.
|
||||
def parser_class(ruby_version)
|
||||
if ruby_version == Prism::Translation::Parser::VERSION_3_3
|
||||
require "prism/translation/parser"
|
||||
Prism::Translation::Parser
|
||||
else
|
||||
super
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# :stopdoc:
|
||||
RuboCop::AST::ProcessedSource.prepend(Prism::Translation::Parser::ProcessedSource)
|
||||
known_rubies = RuboCop::TargetRuby.const_get(:KNOWN_RUBIES)
|
||||
RuboCop::TargetRuby.send(:remove_const, :KNOWN_RUBIES)
|
||||
RuboCop::TargetRuby::KNOWN_RUBIES = [*known_rubies, Prism::Translation::Parser::VERSION_3_3].freeze
|
@ -7,7 +7,7 @@ return unless defined?(RubyVM::InstructionSequence)
|
||||
module Prism
|
||||
class NewlineTest < TestCase
|
||||
base = File.expand_path("../", __FILE__)
|
||||
filepaths = Dir["*.rb", base: base] - %w[encoding_test.rb unescape_test.rb]
|
||||
filepaths = Dir["*.rb", base: base] - %w[encoding_test.rb parser_test.rb unescape_test.rb]
|
||||
|
||||
filepaths.each do |relative|
|
||||
define_method("test_newline_flags_#{relative}") do
|
||||
|
188
test/prism/parser_test.rb
Normal file
188
test/prism/parser_test.rb
Normal file
@ -0,0 +1,188 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require_relative "test_helper"
|
||||
|
||||
begin
|
||||
require "parser/current"
|
||||
rescue LoadError
|
||||
# In CRuby's CI, we're not going to test against the parser gem because we
|
||||
# don't want to have to install it. So in this case we'll just skip this test.
|
||||
return
|
||||
end
|
||||
|
||||
# First, opt in to every AST feature.
|
||||
Parser::Builders::Default.modernize
|
||||
|
||||
# Modify the source map == check so that it doesn't check against the node
|
||||
# itself so we don't get into a recursive loop.
|
||||
Parser::Source::Map.prepend(
|
||||
Module.new {
|
||||
def ==(other)
|
||||
self.class == other.class &&
|
||||
(instance_variables - %i[@node]).map do |ivar|
|
||||
instance_variable_get(ivar) == other.instance_variable_get(ivar)
|
||||
end.reduce(:&)
|
||||
end
|
||||
}
|
||||
)
|
||||
|
||||
# Next, ensure that we're comparing the nodes and also comparing the source
|
||||
# ranges so that we're getting all of the necessary information.
|
||||
Parser::AST::Node.prepend(
|
||||
Module.new {
|
||||
def ==(other)
|
||||
super && (location == other.location)
|
||||
end
|
||||
}
|
||||
)
|
||||
|
||||
module Prism
|
||||
class ParserTest < TestCase
|
||||
base = File.join(__dir__, "fixtures")
|
||||
|
||||
# These files are either failing to parse or failing to translate, so we'll
|
||||
# skip them for now.
|
||||
skip_all = %w[
|
||||
arrays.txt
|
||||
constants.txt
|
||||
dash_heredocs.txt
|
||||
dos_endings.txt
|
||||
embdoc_no_newline_at_end.txt
|
||||
heredocs_with_ignored_newlines.txt
|
||||
regex.txt
|
||||
spanning_heredoc.txt
|
||||
spanning_heredoc_newlines.txt
|
||||
tilde_heredocs.txt
|
||||
unescaping.txt
|
||||
]
|
||||
|
||||
# Not sure why these files are failing on JRuby, but skipping them for now.
|
||||
if RUBY_ENGINE == "jruby"
|
||||
skip_all.push("emoji_method_calls.txt", "symbols.txt")
|
||||
end
|
||||
|
||||
# These files are failing to translate their lexer output into the lexer
|
||||
# output expected by the parser gem, so we'll skip them for now.
|
||||
skip_tokens = %w[
|
||||
comments.txt
|
||||
endless_range_in_conditional.txt
|
||||
heredoc_with_comment.txt
|
||||
heredoc_with_escaped_newline_at_start.txt
|
||||
heredocs_leading_whitespace.txt
|
||||
heredocs_nested.txt
|
||||
heredocs_with_ignored_newlines_and_non_empty.txt
|
||||
indented_file_end.txt
|
||||
non_alphanumeric_methods.txt
|
||||
range_begin_open_inclusive.txt
|
||||
single_quote_heredocs.txt
|
||||
strings.txt
|
||||
xstring.txt
|
||||
]
|
||||
|
||||
Dir["*.txt", base: base].each do |name|
|
||||
next if skip_all.include?(name)
|
||||
|
||||
define_method("test_#{name}") do
|
||||
assert_equal_parses(File.join(base, name), compare_tokens: !skip_tokens.include?(name))
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def assert_equal_parses(filepath, compare_tokens: true)
|
||||
buffer = Parser::Source::Buffer.new(filepath, 1)
|
||||
buffer.source = File.read(filepath)
|
||||
|
||||
parser = Parser::CurrentRuby.default_parser
|
||||
parser.diagnostics.consumer = ->(*) {}
|
||||
parser.diagnostics.all_errors_are_fatal = true
|
||||
|
||||
expected_ast, expected_comments, expected_tokens =
|
||||
begin
|
||||
parser.tokenize(buffer)
|
||||
rescue ArgumentError, Parser::SyntaxError
|
||||
return
|
||||
end
|
||||
|
||||
actual_ast, actual_comments, actual_tokens =
|
||||
Prism::Translation::Parser.new.tokenize(buffer)
|
||||
|
||||
assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) }
|
||||
assert_equal_tokens(expected_tokens, actual_tokens) if compare_tokens
|
||||
assert_equal_comments(expected_comments, actual_comments)
|
||||
end
|
||||
|
||||
def assert_equal_asts_message(expected_ast, actual_ast)
|
||||
queue = [[expected_ast, actual_ast]]
|
||||
|
||||
while (left, right = queue.shift)
|
||||
if left.type != right.type
|
||||
return "expected: #{left.type}\nactual: #{right.type}"
|
||||
end
|
||||
|
||||
if left.location != right.location
|
||||
return "expected:\n#{left.inspect}\n#{left.location}\nactual:\n#{right.inspect}\n#{right.location}"
|
||||
end
|
||||
|
||||
if left.type == :str && left.children[0] != right.children[0]
|
||||
return "expected: #{left.inspect}\nactual: #{right.inspect}"
|
||||
end
|
||||
|
||||
left.children.zip(right.children).each do |left_child, right_child|
|
||||
queue << [left_child, right_child] if left_child.is_a?(Parser::AST::Node)
|
||||
end
|
||||
end
|
||||
|
||||
"expected: #{expected_ast.inspect}\nactual: #{actual_ast.inspect}"
|
||||
end
|
||||
|
||||
def assert_equal_tokens(expected_tokens, actual_tokens)
|
||||
if expected_tokens != actual_tokens
|
||||
expected_index = 0
|
||||
actual_index = 0
|
||||
|
||||
while expected_index < expected_tokens.length
|
||||
expected_token = expected_tokens[expected_index]
|
||||
actual_token = actual_tokens[actual_index]
|
||||
|
||||
expected_index += 1
|
||||
actual_index += 1
|
||||
|
||||
# The parser gem always has a space before a string end in list
|
||||
# literals, but we don't. So we'll skip over the space.
|
||||
if expected_token[0] == :tSPACE && actual_token[0] == :tSTRING_END
|
||||
expected_index += 1
|
||||
next
|
||||
end
|
||||
|
||||
# There are a lot of tokens that have very specific meaning according
|
||||
# to the context of the parser. We don't expose that information in
|
||||
# prism, so we need to normalize these tokens a bit.
|
||||
case actual_token[0]
|
||||
when :kDO
|
||||
actual_token[0] = expected_token[0] if %i[kDO_BLOCK kDO_LAMBDA].include?(expected_token[0])
|
||||
when :tLPAREN
|
||||
actual_token[0] = expected_token[0] if expected_token[0] == :tLPAREN2
|
||||
when :tLCURLY
|
||||
actual_token[0] = expected_token[0] if %i[tLBRACE tLBRACE_ARG].include?(expected_token[0])
|
||||
when :tPOW
|
||||
actual_token[0] = expected_token[0] if expected_token[0] == :tDSTAR
|
||||
end
|
||||
|
||||
# Now we can assert that the tokens are actually equal.
|
||||
assert_equal expected_token, actual_token, -> {
|
||||
"expected: #{expected_token.inspect}\n" \
|
||||
"actual: #{actual_token.inspect}"
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def assert_equal_comments(expected_comments, actual_comments)
|
||||
assert_equal expected_comments, actual_comments, -> {
|
||||
"expected: #{expected_comments.inspect}\n" \
|
||||
"actual: #{actual_comments.inspect}"
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
Loading…
x
Reference in New Issue
Block a user