[ruby/prism] Add parser translation
https://github.com/ruby/prism/commit/8cdec8070c
This commit is contained in:
parent
223910b329
commit
f12ebe1188
@ -26,6 +26,7 @@ module Prism
|
|||||||
autoload :Pack, "prism/pack"
|
autoload :Pack, "prism/pack"
|
||||||
autoload :Pattern, "prism/pattern"
|
autoload :Pattern, "prism/pattern"
|
||||||
autoload :Serialize, "prism/serialize"
|
autoload :Serialize, "prism/serialize"
|
||||||
|
autoload :Translation, "prism/translation"
|
||||||
autoload :Visitor, "prism/visitor"
|
autoload :Visitor, "prism/visitor"
|
||||||
|
|
||||||
# Some of these constants are not meant to be exposed, so marking them as
|
# Some of these constants are not meant to be exposed, so marking them as
|
||||||
|
@ -81,7 +81,7 @@ module Prism
|
|||||||
class RationalNode < Node
|
class RationalNode < Node
|
||||||
# Returns the value of the node as a Ruby Rational.
|
# Returns the value of the node as a Ruby Rational.
|
||||||
def value
|
def value
|
||||||
Rational(numeric.is_a?(IntegerNode) && !numeric.decimal? ? numeric.value : slice.chomp("r"))
|
Rational(numeric.is_a?(IntegerNode) ? numeric.value : slice.chomp("r"))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -31,6 +31,7 @@ Gem::Specification.new do |spec|
|
|||||||
"docs/javascript.md",
|
"docs/javascript.md",
|
||||||
"docs/local_variable_depth.md",
|
"docs/local_variable_depth.md",
|
||||||
"docs/mapping.md",
|
"docs/mapping.md",
|
||||||
|
"docs/parser_translation.md",
|
||||||
"docs/parsing_rules.md",
|
"docs/parsing_rules.md",
|
||||||
"docs/releasing.md",
|
"docs/releasing.md",
|
||||||
"docs/ripper.md",
|
"docs/ripper.md",
|
||||||
@ -74,16 +75,21 @@ Gem::Specification.new do |spec|
|
|||||||
"lib/prism/ffi.rb",
|
"lib/prism/ffi.rb",
|
||||||
"lib/prism/lex_compat.rb",
|
"lib/prism/lex_compat.rb",
|
||||||
"lib/prism/mutation_compiler.rb",
|
"lib/prism/mutation_compiler.rb",
|
||||||
"lib/prism/node.rb",
|
|
||||||
"lib/prism/node_ext.rb",
|
"lib/prism/node_ext.rb",
|
||||||
"lib/prism/node_inspector.rb",
|
"lib/prism/node_inspector.rb",
|
||||||
|
"lib/prism/node.rb",
|
||||||
"lib/prism/pack.rb",
|
"lib/prism/pack.rb",
|
||||||
"lib/prism/parse_result.rb",
|
"lib/prism/parse_result.rb",
|
||||||
|
"lib/prism/parse_result/comments.rb",
|
||||||
|
"lib/prism/parse_result/newlines.rb",
|
||||||
"lib/prism/pattern.rb",
|
"lib/prism/pattern.rb",
|
||||||
"lib/prism/ripper_compat.rb",
|
"lib/prism/ripper_compat.rb",
|
||||||
"lib/prism/serialize.rb",
|
"lib/prism/serialize.rb",
|
||||||
"lib/prism/parse_result/comments.rb",
|
"lib/prism/translation.rb",
|
||||||
"lib/prism/parse_result/newlines.rb",
|
"lib/prism/translation/parser.rb",
|
||||||
|
"lib/prism/translation/parser/compiler.rb",
|
||||||
|
"lib/prism/translation/parser/lexer.rb",
|
||||||
|
"lib/prism/translation/parser/rubocop.rb",
|
||||||
"lib/prism/visitor.rb",
|
"lib/prism/visitor.rb",
|
||||||
"src/diagnostic.c",
|
"src/diagnostic.c",
|
||||||
"src/encoding.c",
|
"src/encoding.c",
|
||||||
|
11
lib/prism/translation.rb
Normal file
11
lib/prism/translation.rb
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
module Prism
|
||||||
|
# This module is responsible for converting the prism syntax tree into other
|
||||||
|
# syntax trees. At the moment it only supports converting to the
|
||||||
|
# whitequark/parser gem's syntax tree, but support is planned for the
|
||||||
|
# seattlerb/ruby_parser gem's syntax tree as well.
|
||||||
|
module Translation
|
||||||
|
autoload :Parser, "prism/translation/parser"
|
||||||
|
end
|
||||||
|
end
|
136
lib/prism/translation/parser.rb
Normal file
136
lib/prism/translation/parser.rb
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require "parser"
|
||||||
|
|
||||||
|
module Prism
|
||||||
|
module Translation
|
||||||
|
# This class is the entry-point for converting a prism syntax tree into the
|
||||||
|
# whitequark/parser gem's syntax tree. It inherits from the base parser for
|
||||||
|
# the parser gem, and overrides the parse* methods to parse with prism and
|
||||||
|
# then translate.
|
||||||
|
class Parser < ::Parser::Base
|
||||||
|
Racc_debug_parser = false # :nodoc:
|
||||||
|
|
||||||
|
def version # :nodoc:
|
||||||
|
33
|
||||||
|
end
|
||||||
|
|
||||||
|
# The default encoding for Ruby files is UTF-8.
|
||||||
|
def default_encoding
|
||||||
|
Encoding::UTF_8
|
||||||
|
end
|
||||||
|
|
||||||
|
def yyerror # :nodoc:
|
||||||
|
end
|
||||||
|
|
||||||
|
# Parses a source buffer and returns the AST.
|
||||||
|
def parse(source_buffer)
|
||||||
|
@source_buffer = source_buffer
|
||||||
|
source = source_buffer.source
|
||||||
|
|
||||||
|
build_ast(
|
||||||
|
Prism.parse(source, filepath: source_buffer.name).value,
|
||||||
|
build_offset_cache(source)
|
||||||
|
)
|
||||||
|
ensure
|
||||||
|
@source_buffer = nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Parses a source buffer and returns the AST and the source code comments.
|
||||||
|
def parse_with_comments(source_buffer)
|
||||||
|
@source_buffer = source_buffer
|
||||||
|
source = source_buffer.source
|
||||||
|
|
||||||
|
offset_cache = build_offset_cache(source)
|
||||||
|
result = Prism.parse(source, filepath: source_buffer.name)
|
||||||
|
|
||||||
|
[
|
||||||
|
build_ast(result.value, offset_cache),
|
||||||
|
build_comments(result.comments, offset_cache)
|
||||||
|
]
|
||||||
|
ensure
|
||||||
|
@source_buffer = nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Parses a source buffer and returns the AST, the source code comments,
|
||||||
|
# and the tokens emitted by the lexer.
|
||||||
|
def tokenize(source_buffer, _recover = false)
|
||||||
|
@source_buffer = source_buffer
|
||||||
|
source = source_buffer.source
|
||||||
|
|
||||||
|
offset_cache = build_offset_cache(source)
|
||||||
|
result = Prism.parse_lex(source, filepath: source_buffer.name)
|
||||||
|
program, tokens = result.value
|
||||||
|
|
||||||
|
[
|
||||||
|
build_ast(program, offset_cache),
|
||||||
|
build_comments(result.comments, offset_cache),
|
||||||
|
build_tokens(tokens, offset_cache)
|
||||||
|
]
|
||||||
|
ensure
|
||||||
|
@source_buffer = nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# Since prism resolves num params for us, we don't need to support this
|
||||||
|
# kind of logic here.
|
||||||
|
def try_declare_numparam(node)
|
||||||
|
node.children[0].match?(/\A_[1-9]\z/)
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
# Prism deals with offsets in bytes, while the parser gem deals with
|
||||||
|
# offsets in characters. We need to handle this conversion in order to
|
||||||
|
# build the parser gem AST.
|
||||||
|
#
|
||||||
|
# If the bytesize of the source is the same as the length, then we can
|
||||||
|
# just use the offset directly. Otherwise, we build a hash that functions
|
||||||
|
# as a cache for the conversion.
|
||||||
|
#
|
||||||
|
# This is a good opportunity for some optimizations. If the source file
|
||||||
|
# has any multi-byte characters, this can tank the performance of the
|
||||||
|
# translator. We could make this significantly faster by using a
|
||||||
|
# different data structure for the cache.
|
||||||
|
def build_offset_cache(source)
|
||||||
|
if source.bytesize == source.length
|
||||||
|
-> (offset) { offset }
|
||||||
|
else
|
||||||
|
Hash.new do |hash, offset|
|
||||||
|
hash[offset] = source.byteslice(0, offset).length
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Build the parser gem AST from the prism AST.
|
||||||
|
def build_ast(program, offset_cache)
|
||||||
|
program.accept(Compiler.new(self, offset_cache))
|
||||||
|
end
|
||||||
|
|
||||||
|
# Build the parser gem comments from the prism comments.
|
||||||
|
def build_comments(comments, offset_cache)
|
||||||
|
comments.map do |comment|
|
||||||
|
location = comment.location
|
||||||
|
|
||||||
|
::Parser::Source::Comment.new(
|
||||||
|
::Parser::Source::Range.new(
|
||||||
|
source_buffer,
|
||||||
|
offset_cache[location.start_offset],
|
||||||
|
offset_cache[location.end_offset]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Build the parser gem tokens from the prism tokens.
|
||||||
|
def build_tokens(tokens, offset_cache)
|
||||||
|
Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a
|
||||||
|
end
|
||||||
|
|
||||||
|
require_relative "parser/compiler"
|
||||||
|
require_relative "parser/lexer"
|
||||||
|
|
||||||
|
private_constant :Compiler
|
||||||
|
private_constant :Lexer
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
1797
lib/prism/translation/parser/compiler.rb
Normal file
1797
lib/prism/translation/parser/compiler.rb
Normal file
File diff suppressed because it is too large
Load Diff
335
lib/prism/translation/parser/lexer.rb
Normal file
335
lib/prism/translation/parser/lexer.rb
Normal file
@ -0,0 +1,335 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
module Prism
|
||||||
|
module Translation
|
||||||
|
class Parser
|
||||||
|
# Accepts a list of prism tokens and converts them into the expected
|
||||||
|
# format for the parser gem.
|
||||||
|
class Lexer
|
||||||
|
# The direct translating of types between the two lexers.
|
||||||
|
TYPES = {
|
||||||
|
# These tokens should never appear in the output of the lexer.
|
||||||
|
EOF: nil,
|
||||||
|
MISSING: nil,
|
||||||
|
NOT_PROVIDED: nil,
|
||||||
|
IGNORED_NEWLINE: nil,
|
||||||
|
EMBDOC_END: nil,
|
||||||
|
EMBDOC_LINE: nil,
|
||||||
|
__END__: nil,
|
||||||
|
|
||||||
|
# These tokens have more or less direct mappings.
|
||||||
|
AMPERSAND: :tAMPER2,
|
||||||
|
AMPERSAND_AMPERSAND: :tANDOP,
|
||||||
|
AMPERSAND_AMPERSAND_EQUAL: :tOP_ASGN,
|
||||||
|
AMPERSAND_DOT: :tANDDOT,
|
||||||
|
AMPERSAND_EQUAL: :tOP_ASGN,
|
||||||
|
BACK_REFERENCE: :tBACK_REF,
|
||||||
|
BACKTICK: :tXSTRING_BEG,
|
||||||
|
BANG: :tBANG,
|
||||||
|
BANG_EQUAL: :tNEQ,
|
||||||
|
BANG_TILDE: :tNMATCH,
|
||||||
|
BRACE_LEFT: :tLCURLY,
|
||||||
|
BRACE_RIGHT: :tRCURLY,
|
||||||
|
BRACKET_LEFT: :tLBRACK2,
|
||||||
|
BRACKET_LEFT_ARRAY: :tLBRACK,
|
||||||
|
BRACKET_LEFT_RIGHT: :tAREF,
|
||||||
|
BRACKET_LEFT_RIGHT_EQUAL: :tASET,
|
||||||
|
BRACKET_RIGHT: :tRBRACK,
|
||||||
|
CARET: :tCARET,
|
||||||
|
CARET_EQUAL: :tOP_ASGN,
|
||||||
|
CHARACTER_LITERAL: :tCHARACTER,
|
||||||
|
CLASS_VARIABLE: :tCVAR,
|
||||||
|
COLON: :tCOLON,
|
||||||
|
COLON_COLON: :tCOLON2,
|
||||||
|
COMMA: :tCOMMA,
|
||||||
|
COMMENT: :tCOMMENT,
|
||||||
|
CONSTANT: :tCONSTANT,
|
||||||
|
DOT: :tDOT,
|
||||||
|
DOT_DOT: :tDOT2,
|
||||||
|
DOT_DOT_DOT: :tDOT3,
|
||||||
|
EMBDOC_BEGIN: :tCOMMENT,
|
||||||
|
EMBEXPR_BEGIN: :tSTRING_DBEG,
|
||||||
|
EMBEXPR_END: :tSTRING_DEND,
|
||||||
|
EMBVAR: :tSTRING_DVAR,
|
||||||
|
EQUAL: :tEQL,
|
||||||
|
EQUAL_EQUAL: :tEQ,
|
||||||
|
EQUAL_EQUAL_EQUAL: :tEQQ,
|
||||||
|
EQUAL_GREATER: :tASSOC,
|
||||||
|
EQUAL_TILDE: :tMATCH,
|
||||||
|
FLOAT: :tFLOAT,
|
||||||
|
FLOAT_IMAGINARY: :tIMAGINARY,
|
||||||
|
FLOAT_RATIONAL: :tRATIONAL,
|
||||||
|
FLOAT_RATIONAL_IMAGINARY: :tIMAGINARY,
|
||||||
|
GLOBAL_VARIABLE: :tGVAR,
|
||||||
|
GREATER: :tGT,
|
||||||
|
GREATER_EQUAL: :tGEQ,
|
||||||
|
GREATER_GREATER: :tRSHFT,
|
||||||
|
GREATER_GREATER_EQUAL: :tOP_ASGN,
|
||||||
|
HEREDOC_START: :tSTRING_BEG,
|
||||||
|
HEREDOC_END: :tSTRING_END,
|
||||||
|
IDENTIFIER: :tIDENTIFIER,
|
||||||
|
INSTANCE_VARIABLE: :tIVAR,
|
||||||
|
INTEGER: :tINTEGER,
|
||||||
|
INTEGER_IMAGINARY: :tIMAGINARY,
|
||||||
|
INTEGER_RATIONAL: :tRATIONAL,
|
||||||
|
INTEGER_RATIONAL_IMAGINARY: :tIMAGINARY,
|
||||||
|
KEYWORD_ALIAS: :kALIAS,
|
||||||
|
KEYWORD_AND: :kAND,
|
||||||
|
KEYWORD_BEGIN: :kBEGIN,
|
||||||
|
KEYWORD_BEGIN_UPCASE: :klBEGIN,
|
||||||
|
KEYWORD_BREAK: :kBREAK,
|
||||||
|
KEYWORD_CASE: :kCASE,
|
||||||
|
KEYWORD_CLASS: :kCLASS,
|
||||||
|
KEYWORD_DEF: :kDEF,
|
||||||
|
KEYWORD_DEFINED: :kDEFINED,
|
||||||
|
KEYWORD_DO: :kDO,
|
||||||
|
KEYWORD_DO_LOOP: :kDO_COND,
|
||||||
|
KEYWORD_END: :kEND,
|
||||||
|
KEYWORD_END_UPCASE: :klEND,
|
||||||
|
KEYWORD_ENSURE: :kENSURE,
|
||||||
|
KEYWORD_ELSE: :kELSE,
|
||||||
|
KEYWORD_ELSIF: :kELSIF,
|
||||||
|
KEYWORD_FALSE: :kFALSE,
|
||||||
|
KEYWORD_FOR: :kFOR,
|
||||||
|
KEYWORD_IF: :kIF,
|
||||||
|
KEYWORD_IF_MODIFIER: :kIF_MOD,
|
||||||
|
KEYWORD_IN: :kIN,
|
||||||
|
KEYWORD_MODULE: :kMODULE,
|
||||||
|
KEYWORD_NEXT: :kNEXT,
|
||||||
|
KEYWORD_NIL: :kNIL,
|
||||||
|
KEYWORD_NOT: :kNOT,
|
||||||
|
KEYWORD_OR: :kOR,
|
||||||
|
KEYWORD_REDO: :kREDO,
|
||||||
|
KEYWORD_RESCUE: :kRESCUE,
|
||||||
|
KEYWORD_RESCUE_MODIFIER: :kRESCUE_MOD,
|
||||||
|
KEYWORD_RETRY: :kRETRY,
|
||||||
|
KEYWORD_RETURN: :kRETURN,
|
||||||
|
KEYWORD_SELF: :kSELF,
|
||||||
|
KEYWORD_SUPER: :kSUPER,
|
||||||
|
KEYWORD_THEN: :kTHEN,
|
||||||
|
KEYWORD_TRUE: :kTRUE,
|
||||||
|
KEYWORD_UNDEF: :kUNDEF,
|
||||||
|
KEYWORD_UNLESS: :kUNLESS,
|
||||||
|
KEYWORD_UNLESS_MODIFIER: :kUNLESS_MOD,
|
||||||
|
KEYWORD_UNTIL: :kUNTIL,
|
||||||
|
KEYWORD_UNTIL_MODIFIER: :kUNTIL_MOD,
|
||||||
|
KEYWORD_WHEN: :kWHEN,
|
||||||
|
KEYWORD_WHILE: :kWHILE,
|
||||||
|
KEYWORD_WHILE_MODIFIER: :kWHILE_MOD,
|
||||||
|
KEYWORD_YIELD: :kYIELD,
|
||||||
|
KEYWORD___ENCODING__: :k__ENCODING__,
|
||||||
|
KEYWORD___FILE__: :k__FILE__,
|
||||||
|
KEYWORD___LINE__: :k__LINE__,
|
||||||
|
LABEL: :tLABEL,
|
||||||
|
LABEL_END: :tLABEL_END,
|
||||||
|
LAMBDA_BEGIN: :tLAMBEG,
|
||||||
|
LESS: :tLT,
|
||||||
|
LESS_EQUAL: :tLEQ,
|
||||||
|
LESS_EQUAL_GREATER: :tCMP,
|
||||||
|
LESS_LESS: :tLSHFT,
|
||||||
|
LESS_LESS_EQUAL: :tOP_ASGN,
|
||||||
|
METHOD_NAME: :tFID,
|
||||||
|
MINUS: :tMINUS,
|
||||||
|
MINUS_EQUAL: :tOP_ASGN,
|
||||||
|
MINUS_GREATER: :tLAMBDA,
|
||||||
|
NEWLINE: :tNL,
|
||||||
|
NUMBERED_REFERENCE: :tNTH_REF,
|
||||||
|
PARENTHESIS_LEFT: :tLPAREN,
|
||||||
|
PARENTHESIS_LEFT_PARENTHESES: :tLPAREN_ARG,
|
||||||
|
PARENTHESIS_RIGHT: :tRPAREN,
|
||||||
|
PERCENT: :tPERCENT,
|
||||||
|
PERCENT_EQUAL: :tOP_ASGN,
|
||||||
|
PERCENT_LOWER_I: :tQSYMBOLS_BEG,
|
||||||
|
PERCENT_LOWER_W: :tQWORDS_BEG,
|
||||||
|
PERCENT_UPPER_I: :tSYMBOLS_BEG,
|
||||||
|
PERCENT_UPPER_W: :tWORDS_BEG,
|
||||||
|
PERCENT_LOWER_X: :tXSTRING_BEG,
|
||||||
|
PLUS: :tPLUS,
|
||||||
|
PLUS_EQUAL: :tOP_ASGN,
|
||||||
|
PIPE_EQUAL: :tOP_ASGN,
|
||||||
|
PIPE: :tPIPE,
|
||||||
|
PIPE_PIPE: :tOROP,
|
||||||
|
PIPE_PIPE_EQUAL: :tOP_ASGN,
|
||||||
|
QUESTION_MARK: :tEH,
|
||||||
|
REGEXP_BEGIN: :tREGEXP_BEG,
|
||||||
|
REGEXP_END: :tSTRING_END,
|
||||||
|
SEMICOLON: :tSEMI,
|
||||||
|
SLASH: :tDIVIDE,
|
||||||
|
SLASH_EQUAL: :tOP_ASGN,
|
||||||
|
STAR: :tSTAR2,
|
||||||
|
STAR_EQUAL: :tOP_ASGN,
|
||||||
|
STAR_STAR: :tPOW,
|
||||||
|
STAR_STAR_EQUAL: :tOP_ASGN,
|
||||||
|
STRING_BEGIN: :tSTRING_BEG,
|
||||||
|
STRING_CONTENT: :tSTRING_CONTENT,
|
||||||
|
STRING_END: :tSTRING_END,
|
||||||
|
SYMBOL_BEGIN: :tSYMBEG,
|
||||||
|
TILDE: :tTILDE,
|
||||||
|
UAMPERSAND: :tAMPER,
|
||||||
|
UCOLON_COLON: :tCOLON3,
|
||||||
|
UDOT_DOT: :tDOT2,
|
||||||
|
UDOT_DOT_DOT: :tBDOT3,
|
||||||
|
UMINUS: :tUMINUS,
|
||||||
|
UMINUS_NUM: :tUNARY_NUM,
|
||||||
|
UPLUS: :tUPLUS,
|
||||||
|
USTAR: :tSTAR,
|
||||||
|
USTAR_STAR: :tPOW,
|
||||||
|
WORDS_SEP: :tSPACE
|
||||||
|
}
|
||||||
|
|
||||||
|
private_constant :TYPES
|
||||||
|
|
||||||
|
# The Parser::Source::Buffer that the tokens were lexed from.
|
||||||
|
attr_reader :source_buffer
|
||||||
|
|
||||||
|
# An array of prism tokens that we lexed.
|
||||||
|
attr_reader :lexed
|
||||||
|
|
||||||
|
# A hash that maps offsets in bytes to offsets in characters.
|
||||||
|
attr_reader :offset_cache
|
||||||
|
|
||||||
|
# Initialize the lexer with the given source buffer, prism tokens, and
|
||||||
|
# offset cache.
|
||||||
|
def initialize(source_buffer, lexed, offset_cache)
|
||||||
|
@source_buffer = source_buffer
|
||||||
|
@lexed = lexed
|
||||||
|
@offset_cache = offset_cache
|
||||||
|
end
|
||||||
|
|
||||||
|
Range = ::Parser::Source::Range # :nodoc:
|
||||||
|
private_constant :Range
|
||||||
|
|
||||||
|
# Convert the prism tokens into the expected format for the parser gem.
|
||||||
|
def to_a
|
||||||
|
tokens = []
|
||||||
|
index = 0
|
||||||
|
|
||||||
|
while index < lexed.length
|
||||||
|
token, = lexed[index]
|
||||||
|
index += 1
|
||||||
|
next if token.type == :IGNORED_NEWLINE || token.type == :EOF
|
||||||
|
|
||||||
|
type = TYPES.fetch(token.type)
|
||||||
|
value = token.value
|
||||||
|
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])
|
||||||
|
|
||||||
|
case type
|
||||||
|
when :tCHARACTER
|
||||||
|
value.delete_prefix!("?")
|
||||||
|
when :tCOMMENT
|
||||||
|
if token.type == :EMBDOC_BEGIN
|
||||||
|
until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
|
||||||
|
value += next_token.value
|
||||||
|
index += 1
|
||||||
|
end
|
||||||
|
|
||||||
|
value += next_token.value
|
||||||
|
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
|
||||||
|
index += 1
|
||||||
|
else
|
||||||
|
value.chomp!
|
||||||
|
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
|
||||||
|
end
|
||||||
|
when :tNL
|
||||||
|
value = nil
|
||||||
|
when :tFLOAT
|
||||||
|
value = Float(value)
|
||||||
|
when :tIMAGINARY
|
||||||
|
value = parse_complex(value)
|
||||||
|
when :tINTEGER
|
||||||
|
if value.start_with?("+")
|
||||||
|
tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
|
||||||
|
location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
|
||||||
|
end
|
||||||
|
|
||||||
|
value = Integer(value)
|
||||||
|
when :tLABEL
|
||||||
|
value.chomp!(":")
|
||||||
|
when :tLABEL_END
|
||||||
|
value.chomp!(":")
|
||||||
|
when :tNTH_REF
|
||||||
|
value = Integer(value.delete_prefix("$"))
|
||||||
|
when :tOP_ASGN
|
||||||
|
value.chomp!("=")
|
||||||
|
when :tRATIONAL
|
||||||
|
value = parse_rational(value)
|
||||||
|
when :tSPACE
|
||||||
|
value = nil
|
||||||
|
when :tSTRING_BEG
|
||||||
|
if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
|
||||||
|
next_location = token.location.join(next_token.location)
|
||||||
|
type = :tSTRING
|
||||||
|
value = ""
|
||||||
|
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
|
||||||
|
index += 1
|
||||||
|
elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
|
||||||
|
next_location = token.location.join(next_next_token.location)
|
||||||
|
type = :tSTRING
|
||||||
|
value = next_token.value
|
||||||
|
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
|
||||||
|
index += 2
|
||||||
|
elsif value.start_with?("<<")
|
||||||
|
quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
|
||||||
|
value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
|
||||||
|
end
|
||||||
|
when :tSTRING_DVAR
|
||||||
|
value = nil
|
||||||
|
when :tSTRING_END
|
||||||
|
if token.type == :REGEXP_END
|
||||||
|
value = value[0]
|
||||||
|
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
|
||||||
|
end
|
||||||
|
when :tSYMBEG
|
||||||
|
if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
|
||||||
|
next_location = token.location.join(next_token.location)
|
||||||
|
type = :tSYMBOL
|
||||||
|
value = next_token.value
|
||||||
|
value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
|
||||||
|
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
|
||||||
|
index += 1
|
||||||
|
end
|
||||||
|
when :tFID
|
||||||
|
if tokens[-1][0] == :kDEF
|
||||||
|
type = :tIDENTIFIER
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
tokens << [type, [value, location]]
|
||||||
|
|
||||||
|
if token.type == :REGEXP_END
|
||||||
|
tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
tokens
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
# Parse a complex from the string representation.
|
||||||
|
def parse_complex(value)
|
||||||
|
value.chomp!("i")
|
||||||
|
|
||||||
|
if value.end_with?("r")
|
||||||
|
Complex(0, parse_rational(value))
|
||||||
|
elsif value.start_with?(/0[BbOoDdXx]/)
|
||||||
|
Complex(0, Integer(value))
|
||||||
|
else
|
||||||
|
Complex(0, value)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Parse a rational from the string representation.
|
||||||
|
def parse_rational(value)
|
||||||
|
value.chomp!("r")
|
||||||
|
|
||||||
|
if value.start_with?(/0[BbOoDdXx]/)
|
||||||
|
Rational(Integer(value))
|
||||||
|
else
|
||||||
|
Rational(value)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
37
lib/prism/translation/parser/rubocop.rb
Normal file
37
lib/prism/translation/parser/rubocop.rb
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require "parser"
|
||||||
|
require "rubocop"
|
||||||
|
|
||||||
|
require "prism"
|
||||||
|
require "prism/translation/parser"
|
||||||
|
|
||||||
|
module Prism
|
||||||
|
module Translation
|
||||||
|
class Parser
|
||||||
|
# This is the special version number that should be used in rubocop
|
||||||
|
# configuration files to trigger using prism.
|
||||||
|
VERSION_3_3 = 80_82_73_83_77.33
|
||||||
|
|
||||||
|
# This module gets prepended into RuboCop::AST::ProcessedSource.
|
||||||
|
module ProcessedSource
|
||||||
|
# Redefine parser_class so that we can inject the prism parser into the
|
||||||
|
# list of known parsers.
|
||||||
|
def parser_class(ruby_version)
|
||||||
|
if ruby_version == Prism::Translation::Parser::VERSION_3_3
|
||||||
|
require "prism/translation/parser"
|
||||||
|
Prism::Translation::Parser
|
||||||
|
else
|
||||||
|
super
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# :stopdoc:
|
||||||
|
RuboCop::AST::ProcessedSource.prepend(Prism::Translation::Parser::ProcessedSource)
|
||||||
|
known_rubies = RuboCop::TargetRuby.const_get(:KNOWN_RUBIES)
|
||||||
|
RuboCop::TargetRuby.send(:remove_const, :KNOWN_RUBIES)
|
||||||
|
RuboCop::TargetRuby::KNOWN_RUBIES = [*known_rubies, Prism::Translation::Parser::VERSION_3_3].freeze
|
@ -7,7 +7,7 @@ return unless defined?(RubyVM::InstructionSequence)
|
|||||||
module Prism
|
module Prism
|
||||||
class NewlineTest < TestCase
|
class NewlineTest < TestCase
|
||||||
base = File.expand_path("../", __FILE__)
|
base = File.expand_path("../", __FILE__)
|
||||||
filepaths = Dir["*.rb", base: base] - %w[encoding_test.rb unescape_test.rb]
|
filepaths = Dir["*.rb", base: base] - %w[encoding_test.rb parser_test.rb unescape_test.rb]
|
||||||
|
|
||||||
filepaths.each do |relative|
|
filepaths.each do |relative|
|
||||||
define_method("test_newline_flags_#{relative}") do
|
define_method("test_newline_flags_#{relative}") do
|
||||||
|
188
test/prism/parser_test.rb
Normal file
188
test/prism/parser_test.rb
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require_relative "test_helper"
|
||||||
|
|
||||||
|
begin
|
||||||
|
require "parser/current"
|
||||||
|
rescue LoadError
|
||||||
|
# In CRuby's CI, we're not going to test against the parser gem because we
|
||||||
|
# don't want to have to install it. So in this case we'll just skip this test.
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
# First, opt in to every AST feature.
|
||||||
|
Parser::Builders::Default.modernize
|
||||||
|
|
||||||
|
# Modify the source map == check so that it doesn't check against the node
|
||||||
|
# itself so we don't get into a recursive loop.
|
||||||
|
Parser::Source::Map.prepend(
|
||||||
|
Module.new {
|
||||||
|
def ==(other)
|
||||||
|
self.class == other.class &&
|
||||||
|
(instance_variables - %i[@node]).map do |ivar|
|
||||||
|
instance_variable_get(ivar) == other.instance_variable_get(ivar)
|
||||||
|
end.reduce(:&)
|
||||||
|
end
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Next, ensure that we're comparing the nodes and also comparing the source
|
||||||
|
# ranges so that we're getting all of the necessary information.
|
||||||
|
Parser::AST::Node.prepend(
|
||||||
|
Module.new {
|
||||||
|
def ==(other)
|
||||||
|
super && (location == other.location)
|
||||||
|
end
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
module Prism
|
||||||
|
class ParserTest < TestCase
|
||||||
|
base = File.join(__dir__, "fixtures")
|
||||||
|
|
||||||
|
# These files are either failing to parse or failing to translate, so we'll
|
||||||
|
# skip them for now.
|
||||||
|
skip_all = %w[
|
||||||
|
arrays.txt
|
||||||
|
constants.txt
|
||||||
|
dash_heredocs.txt
|
||||||
|
dos_endings.txt
|
||||||
|
embdoc_no_newline_at_end.txt
|
||||||
|
heredocs_with_ignored_newlines.txt
|
||||||
|
regex.txt
|
||||||
|
spanning_heredoc.txt
|
||||||
|
spanning_heredoc_newlines.txt
|
||||||
|
tilde_heredocs.txt
|
||||||
|
unescaping.txt
|
||||||
|
]
|
||||||
|
|
||||||
|
# Not sure why these files are failing on JRuby, but skipping them for now.
|
||||||
|
if RUBY_ENGINE == "jruby"
|
||||||
|
skip_all.push("emoji_method_calls.txt", "symbols.txt")
|
||||||
|
end
|
||||||
|
|
||||||
|
# These files are failing to translate their lexer output into the lexer
|
||||||
|
# output expected by the parser gem, so we'll skip them for now.
|
||||||
|
skip_tokens = %w[
|
||||||
|
comments.txt
|
||||||
|
endless_range_in_conditional.txt
|
||||||
|
heredoc_with_comment.txt
|
||||||
|
heredoc_with_escaped_newline_at_start.txt
|
||||||
|
heredocs_leading_whitespace.txt
|
||||||
|
heredocs_nested.txt
|
||||||
|
heredocs_with_ignored_newlines_and_non_empty.txt
|
||||||
|
indented_file_end.txt
|
||||||
|
non_alphanumeric_methods.txt
|
||||||
|
range_begin_open_inclusive.txt
|
||||||
|
single_quote_heredocs.txt
|
||||||
|
strings.txt
|
||||||
|
xstring.txt
|
||||||
|
]
|
||||||
|
|
||||||
|
Dir["*.txt", base: base].each do |name|
|
||||||
|
next if skip_all.include?(name)
|
||||||
|
|
||||||
|
define_method("test_#{name}") do
|
||||||
|
assert_equal_parses(File.join(base, name), compare_tokens: !skip_tokens.include?(name))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def assert_equal_parses(filepath, compare_tokens: true)
|
||||||
|
buffer = Parser::Source::Buffer.new(filepath, 1)
|
||||||
|
buffer.source = File.read(filepath)
|
||||||
|
|
||||||
|
parser = Parser::CurrentRuby.default_parser
|
||||||
|
parser.diagnostics.consumer = ->(*) {}
|
||||||
|
parser.diagnostics.all_errors_are_fatal = true
|
||||||
|
|
||||||
|
expected_ast, expected_comments, expected_tokens =
|
||||||
|
begin
|
||||||
|
parser.tokenize(buffer)
|
||||||
|
rescue ArgumentError, Parser::SyntaxError
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
actual_ast, actual_comments, actual_tokens =
|
||||||
|
Prism::Translation::Parser.new.tokenize(buffer)
|
||||||
|
|
||||||
|
assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) }
|
||||||
|
assert_equal_tokens(expected_tokens, actual_tokens) if compare_tokens
|
||||||
|
assert_equal_comments(expected_comments, actual_comments)
|
||||||
|
end
|
||||||
|
|
||||||
|
def assert_equal_asts_message(expected_ast, actual_ast)
|
||||||
|
queue = [[expected_ast, actual_ast]]
|
||||||
|
|
||||||
|
while (left, right = queue.shift)
|
||||||
|
if left.type != right.type
|
||||||
|
return "expected: #{left.type}\nactual: #{right.type}"
|
||||||
|
end
|
||||||
|
|
||||||
|
if left.location != right.location
|
||||||
|
return "expected:\n#{left.inspect}\n#{left.location}\nactual:\n#{right.inspect}\n#{right.location}"
|
||||||
|
end
|
||||||
|
|
||||||
|
if left.type == :str && left.children[0] != right.children[0]
|
||||||
|
return "expected: #{left.inspect}\nactual: #{right.inspect}"
|
||||||
|
end
|
||||||
|
|
||||||
|
left.children.zip(right.children).each do |left_child, right_child|
|
||||||
|
queue << [left_child, right_child] if left_child.is_a?(Parser::AST::Node)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
"expected: #{expected_ast.inspect}\nactual: #{actual_ast.inspect}"
|
||||||
|
end
|
||||||
|
|
||||||
|
def assert_equal_tokens(expected_tokens, actual_tokens)
|
||||||
|
if expected_tokens != actual_tokens
|
||||||
|
expected_index = 0
|
||||||
|
actual_index = 0
|
||||||
|
|
||||||
|
while expected_index < expected_tokens.length
|
||||||
|
expected_token = expected_tokens[expected_index]
|
||||||
|
actual_token = actual_tokens[actual_index]
|
||||||
|
|
||||||
|
expected_index += 1
|
||||||
|
actual_index += 1
|
||||||
|
|
||||||
|
# The parser gem always has a space before a string end in list
|
||||||
|
# literals, but we don't. So we'll skip over the space.
|
||||||
|
if expected_token[0] == :tSPACE && actual_token[0] == :tSTRING_END
|
||||||
|
expected_index += 1
|
||||||
|
next
|
||||||
|
end
|
||||||
|
|
||||||
|
# There are a lot of tokens that have very specific meaning according
|
||||||
|
# to the context of the parser. We don't expose that information in
|
||||||
|
# prism, so we need to normalize these tokens a bit.
|
||||||
|
case actual_token[0]
|
||||||
|
when :kDO
|
||||||
|
actual_token[0] = expected_token[0] if %i[kDO_BLOCK kDO_LAMBDA].include?(expected_token[0])
|
||||||
|
when :tLPAREN
|
||||||
|
actual_token[0] = expected_token[0] if expected_token[0] == :tLPAREN2
|
||||||
|
when :tLCURLY
|
||||||
|
actual_token[0] = expected_token[0] if %i[tLBRACE tLBRACE_ARG].include?(expected_token[0])
|
||||||
|
when :tPOW
|
||||||
|
actual_token[0] = expected_token[0] if expected_token[0] == :tDSTAR
|
||||||
|
end
|
||||||
|
|
||||||
|
# Now we can assert that the tokens are actually equal.
|
||||||
|
assert_equal expected_token, actual_token, -> {
|
||||||
|
"expected: #{expected_token.inspect}\n" \
|
||||||
|
"actual: #{actual_token.inspect}"
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def assert_equal_comments(expected_comments, actual_comments)
|
||||||
|
assert_equal expected_comments, actual_comments, -> {
|
||||||
|
"expected: #{expected_comments.inspect}\n" \
|
||||||
|
"actual: #{actual_comments.inspect}"
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
Loading…
x
Reference in New Issue
Block a user