[Feature #19741] Sync all files in yarp

This commit is the initial sync of all files from ruby/yarp
into ruby/ruby. Notably, it does the following:

* Sync all ruby/yarp/lib/ files to ruby/ruby/lib/yarp
* Sync all ruby/yarp/src/ files to ruby/ruby/yarp/
* Sync all ruby/yarp/test/ files to ruby/ruby/test/yarp
This commit is contained in:
Jemma Issroff 2023-06-20 11:53:02 -04:00 committed by Takashi Kokubun
parent 08478fefca
commit cc7f765f2c
Notes: git 2023-06-21 18:26:01 +00:00
1898 changed files with 105766 additions and 0 deletions

248
lib/yarp.rb Normal file
View File

@ -0,0 +1,248 @@
# frozen_string_literal: true
module YARP
# This represents a location in the source corresponding to a node or token.
class Location
attr_reader :start_offset, :length
def initialize(start_offset, length)
@start_offset = start_offset
@length = length
end
def end_offset
@start_offset + @length
end
def deconstruct_keys(keys)
{ start_offset: start_offset, end_offset: end_offset }
end
def pretty_print(q)
q.text("(#{start_offset}...#{end_offset})")
end
def ==(other)
other in Location[start_offset: ^(start_offset), end_offset: ^(end_offset)]
end
def self.null
new(0, 0)
end
end
# This represents a comment that was encountered during parsing.
class Comment
attr_reader :type, :location
def initialize(type, location)
@type = type
@location = location
end
def deconstruct_keys(keys)
{ type: type, location: location }
end
end
# This represents an error that was encountered during parsing.
class ParseError
attr_reader :message, :location
def initialize(message, location)
@message = message
@location = location
end
def deconstruct_keys(keys)
{ message: message, location: location }
end
end
# This represents a warning that was encountered during parsing.
class ParseWarning
attr_reader :message, :location
def initialize(message, location)
@message = message
@location = location
end
def deconstruct_keys(keys)
{ message: message, location: location }
end
end
# This represents the result of a call to ::parse or ::parse_file. It contains
# the AST, any comments that were encounters, and any errors that were
# encountered.
class ParseResult
attr_reader :value, :comments, :errors, :warnings
def initialize(value, comments, errors, warnings)
@value = value
@comments = comments
@errors = errors
@warnings = warnings
end
def deconstruct_keys(keys)
{ value: value, comments: comments, errors: errors, warnings: warnings }
end
def success?
errors.empty?
end
def failure?
!success?
end
end
# This represents a token from the Ruby source.
class Token
attr_reader :type, :value, :start_offset, :length
def initialize(type, value, start_offset, length)
@type = type
@value = value
@start_offset = start_offset
@length = length
end
def end_offset
@start_offset + @length
end
def location
Location.new(@start_offset, @length)
end
def deconstruct_keys(keys)
{ type: type, value: value, location: location }
end
def pretty_print(q)
q.group do
q.text(type.to_s)
self.location.pretty_print(q)
q.text("(")
q.nest(2) do
q.breakable("")
q.pp(value)
end
q.breakable("")
q.text(")")
end
end
def ==(other)
other in Token[type: ^(type), value: ^(value)]
end
end
# This represents a node in the tree.
class Node
attr_reader :start_offset, :length
def end_offset
@start_offset + @length
end
def location
Location.new(@start_offset, @length)
end
def pretty_print(q)
q.group do
q.text(self.class.name.split("::").last)
self.location.pretty_print(q)
q.text("(")
q.nest(2) do
deconstructed = deconstruct_keys([])
deconstructed.delete(:location)
q.breakable("")
q.seplist(deconstructed, lambda { q.comma_breakable }, :each_value) { |value| q.pp(value) }
end
q.breakable("")
q.text(")")
end
end
end
# A class that knows how to walk down the tree. None of the individual visit
# methods are implemented on this visitor, so it forces the consumer to
# implement each one that they need. For a default implementation that
# continues walking the tree, see the Visitor class.
class BasicVisitor
def visit(node)
node&.accept(self)
end
def visit_all(nodes)
nodes.map { |node| visit(node) }
end
def visit_child_nodes(node)
visit_all(node.child_nodes)
end
end
# This lexes with the Ripper lex. It drops any space events but otherwise
# returns the same tokens.
# [raises SyntaxError] if the syntax in source is invalid
def self.lex_ripper(source)
previous = []
results = []
Ripper.lex(source, raise_errors: true).each do |token|
case token[1]
when :on_sp
# skip
when :on_tstring_content
if previous[1] == :on_tstring_content &&
(token[2].start_with?("\#$") || token[2].start_with?("\#@"))
previous[2] << token[2]
else
results << token
previous = token
end
when :on_words_sep
if previous[1] == :on_words_sep
previous[2] << token[2]
else
results << token
previous = token
end
else
results << token
previous = token
end
end
results
end
# Load the serialized AST using the source as a reference into a tree.
def self.load(source, serialized)
Serialize.load(source, serialized)
end
def self.parse(source, filepath=nil)
_parse(source, filepath)
end
end
require_relative "yarp/lex_compat"
require_relative "yarp/node"
require_relative "yarp/ripper_compat"
require_relative "yarp/serialize"
require_relative "yarp/pack"
require "yarp.so"
module YARP
class << self
private :_parse
end
end

166
lib/yarp/language_server.rb Normal file
View File

@ -0,0 +1,166 @@
# frozen_string_literal: true
require "cgi"
require "json"
require "uri"
module YARP
# YARP additionally ships with a language server conforming to the
# language server protocol. It can be invoked by running the yarp-lsp
# bin script (bin/yarp-lsp)
class LanguageServer
GITHUB_TEMPLATE = <<~TEMPLATE
Reporting issue with error `%{error}`.
## Expected behavior
<!-- TODO: Briefly explain what the expected behavior should be on this example. -->
## Actual behavior
<!-- TODO: Describe here what actually happened. -->
## Steps to reproduce the problem
<!-- TODO: Describe how we can reproduce the problem. -->
## Additional information
<!-- TODO: Include any additional information, such as screenshots. -->
TEMPLATE
attr_reader :input, :output
def initialize(
input: $stdin,
output: $stdout
)
@input = input.binmode
@output = output.binmode
end
# rubocop:disable Layout/LineLength
def run
store =
Hash.new do |hash, uri|
filepath = CGI.unescape(URI.parse(uri).path)
File.exist?(filepath) ? (hash[uri] = File.read(filepath)) : nil
end
while (headers = input.gets("\r\n\r\n"))
source = input.read(headers[/Content-Length: (\d+)/i, 1].to_i)
request = JSON.parse(source, symbolize_names: true)
# stree-ignore
case request
in { method: "initialize", id: }
store.clear
write(id: id, result: { capabilities: capabilities })
in { method: "initialized" }
# ignored
in { method: "shutdown" } # tolerate missing ID to be a good citizen
store.clear
write(id: request[:id], result: {})
in { method: "exit"}
return
in { method: "textDocument/didChange", params: { textDocument: { uri: }, contentChanges: [{ text: }, *] } }
store[uri] = text
in { method: "textDocument/didOpen", params: { textDocument: { uri:, text: } } }
store[uri] = text
in { method: "textDocument/didClose", params: { textDocument: { uri: } } }
store.delete(uri)
in { method: "textDocument/diagnostic", id:, params: { textDocument: { uri: } } }
contents = store[uri]
write(id: id, result: contents ? diagnostics(contents) : nil)
in { method: "textDocument/codeAction", id:, params: { textDocument: { uri: }, context: { diagnostics: }}}
contents = store[uri]
write(id: id, result: contents ? code_actions(contents, diagnostics) : nil)
in { method: %r{\$/.+} }
# ignored
end
end
end
# rubocop:enable Layout/LineLength
private
def capabilities
{
codeActionProvider: {
codeActionKinds: [
'quickfix',
],
},
diagnosticProvider: {
interFileDependencies: false,
workspaceDiagnostics: false,
},
textDocumentSync: {
change: 1,
openClose: true
},
}
end
def code_actions(source, diagnostics)
diagnostics.map do |diagnostic|
message = diagnostic[:message]
issue_content = URI.encode_www_form_component(GITHUB_TEMPLATE % {error: message})
issue_link = "https://github.com/ruby/yarp/issues/new?&labels=Bug&body=#{issue_content}"
{
title: "Report incorrect error: `#{diagnostic[:message]}`",
kind: "quickfix",
diagnostics: [diagnostic],
command: {
title: "Report incorrect error",
command: "vscode.open",
arguments: [issue_link]
}
}
end
end
def diagnostics(source)
offsets = Hash.new do |hash, key|
slice = source.byteslice(...key)
lineno = slice.count("\n")
char = slice.length
newline = source.rindex("\n", [char - 1, 0].max) || -1
hash[key] = { line: lineno, character: char - newline - 1 }
end
parse_output = YARP.parse(source)
{
kind: "full",
items: [
*parse_output.errors.map do |error|
{
range: {
start: offsets[error.location.start_offset],
end: offsets[error.location.end_offset],
},
message: error.message,
severity: 1,
}
end,
*parse_output.warnings.map do |warning|
{
range: {
start: offsets[warning.location.start_offset],
end: offsets[warning.location.end_offset],
},
message: warning.message,
severity: 2,
}
end,
]
}
end
def write(value)
response = value.merge(jsonrpc: "2.0").to_json
output.print("Content-Length: #{response.bytesize}\r\n\r\n#{response}")
output.flush
end
end
end

749
lib/yarp/lex_compat.rb Normal file
View File

@ -0,0 +1,749 @@
# frozen_string_literal: true
require "delegate"
module YARP
# This class is responsible for lexing the source using YARP and then
# converting those tokens to be compatible with Ripper. In the vast majority
# of cases, this is a one-to-one mapping of the token type. Everything else
# generally lines up. However, there are a few cases that require special
# handling.
class LexCompat
# This is a mapping of YARP token types to Ripper token types. This is a
# many-to-one mapping because we split up our token types, whereas Ripper
# tends to group them.
RIPPER = {
AMPERSAND: :on_op,
AMPERSAND_AMPERSAND: :on_op,
AMPERSAND_AMPERSAND_EQUAL: :on_op,
AMPERSAND_DOT: :on_op,
AMPERSAND_EQUAL: :on_op,
BACK_REFERENCE: :on_backref,
BACKTICK: :on_backtick,
BANG: :on_op,
BANG_EQUAL: :on_op,
BANG_TILDE: :on_op,
BRACE_LEFT: :on_lbrace,
BRACE_RIGHT: :on_rbrace,
BRACKET_LEFT: :on_lbracket,
BRACKET_LEFT_ARRAY: :on_lbracket,
BRACKET_LEFT_RIGHT: :on_op,
BRACKET_LEFT_RIGHT_EQUAL: :on_op,
BRACKET_RIGHT: :on_rbracket,
CARET: :on_op,
CARET_EQUAL: :on_op,
CHARACTER_LITERAL: :on_CHAR,
CLASS_VARIABLE: :on_cvar,
COLON: :on_op,
COLON_COLON: :on_op,
COMMA: :on_comma,
COMMENT: :on_comment,
CONSTANT: :on_const,
DOT: :on_period,
DOT_DOT: :on_op,
DOT_DOT_DOT: :on_op,
EMBDOC_BEGIN: :on_embdoc_beg,
EMBDOC_END: :on_embdoc_end,
EMBDOC_LINE: :on_embdoc,
EMBEXPR_BEGIN: :on_embexpr_beg,
EMBEXPR_END: :on_embexpr_end,
EMBVAR: :on_embvar,
EOF: :on_eof,
EQUAL: :on_op,
EQUAL_EQUAL: :on_op,
EQUAL_EQUAL_EQUAL: :on_op,
EQUAL_GREATER: :on_op,
EQUAL_TILDE: :on_op,
FLOAT: :on_float,
GREATER: :on_op,
GREATER_EQUAL: :on_op,
GREATER_GREATER: :on_op,
GREATER_GREATER_EQUAL: :on_op,
GLOBAL_VARIABLE: :on_gvar,
HEREDOC_END: :on_heredoc_end,
HEREDOC_START: :on_heredoc_beg,
IDENTIFIER: :on_ident,
IGNORED_NEWLINE: :on_ignored_nl,
IMAGINARY_NUMBER: :on_imaginary,
INTEGER: :on_int,
INSTANCE_VARIABLE: :on_ivar,
INVALID: :INVALID,
KEYWORD___ENCODING__: :on_kw,
KEYWORD___LINE__: :on_kw,
KEYWORD___FILE__: :on_kw,
KEYWORD_ALIAS: :on_kw,
KEYWORD_AND: :on_kw,
KEYWORD_BEGIN: :on_kw,
KEYWORD_BEGIN_UPCASE: :on_kw,
KEYWORD_BREAK: :on_kw,
KEYWORD_CASE: :on_kw,
KEYWORD_CLASS: :on_kw,
KEYWORD_DEF: :on_kw,
KEYWORD_DEFINED: :on_kw,
KEYWORD_DO: :on_kw,
KEYWORD_DO_LOOP: :on_kw,
KEYWORD_ELSE: :on_kw,
KEYWORD_ELSIF: :on_kw,
KEYWORD_END: :on_kw,
KEYWORD_END_UPCASE: :on_kw,
KEYWORD_ENSURE: :on_kw,
KEYWORD_FALSE: :on_kw,
KEYWORD_FOR: :on_kw,
KEYWORD_IF: :on_kw,
KEYWORD_IF_MODIFIER: :on_kw,
KEYWORD_IN: :on_kw,
KEYWORD_MODULE: :on_kw,
KEYWORD_NEXT: :on_kw,
KEYWORD_NIL: :on_kw,
KEYWORD_NOT: :on_kw,
KEYWORD_OR: :on_kw,
KEYWORD_REDO: :on_kw,
KEYWORD_RESCUE: :on_kw,
KEYWORD_RESCUE_MODIFIER: :on_kw,
KEYWORD_RETRY: :on_kw,
KEYWORD_RETURN: :on_kw,
KEYWORD_SELF: :on_kw,
KEYWORD_SUPER: :on_kw,
KEYWORD_THEN: :on_kw,
KEYWORD_TRUE: :on_kw,
KEYWORD_UNDEF: :on_kw,
KEYWORD_UNLESS: :on_kw,
KEYWORD_UNLESS_MODIFIER: :on_kw,
KEYWORD_UNTIL: :on_kw,
KEYWORD_UNTIL_MODIFIER: :on_kw,
KEYWORD_WHEN: :on_kw,
KEYWORD_WHILE: :on_kw,
KEYWORD_WHILE_MODIFIER: :on_kw,
KEYWORD_YIELD: :on_kw,
LABEL: :on_label,
LABEL_END: :on_label_end,
LAMBDA_BEGIN: :on_tlambeg,
LESS: :on_op,
LESS_EQUAL: :on_op,
LESS_EQUAL_GREATER: :on_op,
LESS_LESS: :on_op,
LESS_LESS_EQUAL: :on_op,
MINUS: :on_op,
MINUS_EQUAL: :on_op,
MINUS_GREATER: :on_tlambda,
NEWLINE: :on_nl,
NUMBERED_REFERENCE: :on_backref,
PARENTHESIS_LEFT: :on_lparen,
PARENTHESIS_LEFT_PARENTHESES: :on_lparen,
PARENTHESIS_RIGHT: :on_rparen,
PERCENT: :on_op,
PERCENT_EQUAL: :on_op,
PERCENT_LOWER_I: :on_qsymbols_beg,
PERCENT_LOWER_W: :on_qwords_beg,
PERCENT_LOWER_X: :on_backtick,
PERCENT_UPPER_I: :on_symbols_beg,
PERCENT_UPPER_W: :on_words_beg,
PIPE: :on_op,
PIPE_EQUAL: :on_op,
PIPE_PIPE: :on_op,
PIPE_PIPE_EQUAL: :on_op,
PLUS: :on_op,
PLUS_EQUAL: :on_op,
QUESTION_MARK: :on_op,
RATIONAL_NUMBER: :on_rational,
REGEXP_BEGIN: :on_regexp_beg,
REGEXP_END: :on_regexp_end,
SEMICOLON: :on_semicolon,
SLASH: :on_op,
SLASH_EQUAL: :on_op,
STAR: :on_op,
STAR_EQUAL: :on_op,
STAR_STAR: :on_op,
STAR_STAR_EQUAL: :on_op,
STRING_BEGIN: :on_tstring_beg,
STRING_CONTENT: :on_tstring_content,
STRING_END: :on_tstring_end,
SYMBOL_BEGIN: :on_symbeg,
TILDE: :on_op,
UCOLON_COLON: :on_op,
UDOT_DOT: :on_op,
UDOT_DOT_DOT: :on_op,
UMINUS: :on_op,
UMINUS_NUM: :on_op,
UPLUS: :on_op,
USTAR: :on_op,
USTAR_STAR: :on_op,
WORDS_SEP: :on_words_sep,
__END__: :on___end__
}.freeze
# When we produce tokens, we produce the same arrays that Ripper does.
# However, we add a couple of convenience methods onto them to make them a
# little easier to work with. We delegate all other methods to the array.
class Token < SimpleDelegator
def location
self[0]
end
def event
self[1]
end
def value
self[2]
end
def state
self[3]
end
end
# Ripper doesn't include the rest of the token in the event, so we need to
# trim it down to just the content on the first line when comparing.
class EndContentToken < Token
def ==(other)
[self[0], self[1], self[2][0..self[2].index("\n")], self[3]] == other
end
end
# It is extremely non obvious which state the parser is in when comments get
# dispatched. Because of this we don't both comparing state when comparing
# against other comment tokens.
class CommentToken < Token
def ==(other)
self[0...-1] == other[0...-1]
end
end
# Heredoc end tokens are emitted in an odd order, so we don't compare the
# state on them.
class HeredocEndToken < Token
def ==(other)
self[0...-1] == other[0...-1]
end
end
# Ident tokens for the most part are exactly the same, except sometimes we
# know an ident is a local when ripper doesn't (when they are introduced
# through named captures in regular expressions). In that case we don't
# compare the state.
class IdentToken < Token
def ==(other)
(self[0...-1] == other[0...-1]) && (
(other[3] == Ripper::EXPR_LABEL | Ripper::EXPR_END) ||
(other[3] & Ripper::EXPR_ARG_ANY != 0)
)
end
end
# Ignored newlines can occasionally have a LABEL state attached to them, so
# we compare the state differently here.
class IgnoredNewlineToken < Token
def ==(other)
return false unless self[0...-1] == other[0...-1]
if self[4] == Ripper::EXPR_ARG | Ripper::EXPR_LABELED
other[4] & Ripper::EXPR_ARG | Ripper::EXPR_LABELED > 0
else
self[4] == other[4]
end
end
end
# A heredoc in this case is a list of tokens that belong to the body of the
# heredoc that should be appended onto the list of tokens when the heredoc
# closes.
module Heredoc
# Heredocs that are no dash or tilde heredocs are just a list of tokens.
# We need to keep them around so that we can insert them in the correct
# order back into the token stream and set the state of the last token to
# the state that the heredoc was opened in.
class PlainHeredoc
attr_reader :tokens
def initialize
@tokens = []
end
def <<(token)
tokens << token
end
def to_a
tokens
end
end
# Dash heredocs are a little more complicated. They are a list of tokens
# that need to be split on "\\\n" to mimic Ripper's behavior. We also need
# to keep track of the state that the heredoc was opened in.
class DashHeredoc
attr_reader :split, :tokens
def initialize(split)
@split = split
@tokens = []
end
def <<(token)
tokens << token
end
def to_a
embexpr_balance = 0
tokens.each_with_object([]) do |token, results|
case token.event
when :on_embexpr_beg
embexpr_balance += 1
results << token
when :on_embexpr_end
embexpr_balance -= 1
results << token
when :on_tstring_content
if embexpr_balance == 0
lineno = token[0][0]
column = token[0][1]
if split
# Split on "\\\n" to mimic Ripper's behavior. Use a lookbehind
# to keep the delimiter in the result.
token.value.split(/(?<=[^\\]\\\n)|(?<=[^\\]\\\r\n)/).each_with_index do |value, index|
column = 0 if index > 0
results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
lineno += value.count("\n")
end
else
results << token
end
else
results << token
end
else
results << token
end
end
end
end
# Heredocs that are dedenting heredocs are a little more complicated.
# Ripper outputs on_ignored_sp tokens for the whitespace that is being
# removed from the output. YARP only modifies the node itself and keeps
# the token the same. This simplifies YARP, but makes comparing against
# Ripper much harder because there is a length mismatch.
#
# Fortunately, we already have to pull out the heredoc tokens in order to
# insert them into the stream in the correct order. As such, we can do
# some extra manipulation on the tokens to make them match Ripper's
# output by mirroring the dedent logic that Ripper uses.
class DedentingHeredoc
TAB_WIDTH = 8
attr_reader :tokens, :dedent_next, :dedent, :embexpr_balance
def initialize
@tokens = []
@dedent_next = true
@dedent = nil
@embexpr_balance = 0
end
# As tokens are coming in, we track the minimum amount of common leading
# whitespace on plain string content tokens. This allows us to later
# remove that amount of whitespace from the beginning of each line.
def <<(token)
case token.event
when :on_embexpr_beg, :on_heredoc_beg
@embexpr_balance += 1
when :on_embexpr_end, :on_heredoc_end
@embexpr_balance -= 1
when :on_tstring_content
if embexpr_balance == 0
token.value.split(/(?<=\n)/).each_with_index do |line, index|
next if line.strip.empty? && line.end_with?("\n")
next if !(dedent_next || index > 0)
leading = line[/\A(\s*)\n?/, 1]
next_dedent = 0
leading.each_char do |char|
if char == "\t"
next_dedent = next_dedent - (next_dedent % TAB_WIDTH) + TAB_WIDTH
else
next_dedent += 1
end
end
@dedent = [dedent, next_dedent].compact.min
end
end
end
@dedent_next = token.event == :on_tstring_content && embexpr_balance == 0
tokens << token
end
def to_a
# If every line in the heredoc is blank, we still need to split up the
# string content token into multiple tokens.
if dedent.nil?
results = []
embexpr_balance = 0
tokens.each do |token|
case token.event
when :on_embexpr_beg, :on_heredoc_beg
embexpr_balance += 1
results << token
when :on_embexpr_end, :on_heredoc_end
embexpr_balance -= 1
results << token
when :on_tstring_content
if embexpr_balance == 0
lineno = token[0][0]
column = token[0][1]
token.value.split(/(?<=\n)/).each_with_index do |value, index|
column = 0 if index > 0
results << Token.new([[lineno, column], :on_tstring_content, value, token.state])
lineno += 1
end
else
results << token
end
else
results << token
end
end
return results
end
# Otherwise, we're going to run through each token in the list and
# insert on_ignored_sp tokens for the amount of dedent that we need to
# perform. We also need to remove the dedent from the beginning of
# each line of plain string content tokens.
results = []
dedent_next = true
embexpr_balance = 0
tokens.each do |token|
# Notice that the structure of this conditional largely matches the
# whitespace calculation we performed above. This is because
# checking if the subsequent token needs to be dedented is common to
# both the dedent calculation and the ignored_sp insertion.
case token.event
when :on_embexpr_beg
embexpr_balance += 1
results << token
when :on_embexpr_end
embexpr_balance -= 1
results << token
when :on_tstring_content
if embexpr_balance == 0
# Here we're going to split the string on newlines, but maintain
# the newlines in the resulting array. We'll do that with a look
# behind assertion.
splits = token.value.split(/(?<=\n)/)
index = 0
while index < splits.length
line = splits[index]
lineno = token[0][0] + index
column = token[0][1]
# Blank lines do not count toward common leading whitespace
# calculation and do not need to be dedented.
if dedent_next || index > 0
column = 0
end
# If the dedent is 0 and we're not supposed to dedent the next
# line or this line doesn't start with whitespace, then we
# should concatenate the rest of the string to match ripper.
if dedent == 0 && (!dedent_next || !line.start_with?(/\s/))
line = splits[index..].join
index = splits.length
end
# If we are supposed to dedent this line or if this is not the
# first line of the string and this line isn't entirely blank,
# then we need to insert an on_ignored_sp token and remove the
# dedent from the beginning of the line.
if (dedent > 0) && (dedent_next || index > 0)
deleting = 0
deleted_chars = []
# Gather up all of the characters that we're going to
# delete, stopping when you hit a character that would put
# you over the dedent amount.
line.each_char.with_index do |char, i|
case char
when "\r"
if line.chars[i + 1] == "\n"
break
end
when "\n"
break
when "\t"
deleting = deleting - (deleting % TAB_WIDTH) + TAB_WIDTH
else
deleting += 1
end
break if deleting > dedent
deleted_chars << char
end
# If we have something to delete, then delete it from the
# string and insert an on_ignored_sp token.
if deleted_chars.any?
ignored = deleted_chars.join
line.delete_prefix!(ignored)
results << Token.new([[lineno, 0], :on_ignored_sp, ignored, token[3]])
column = ignored.length
end
end
results << Token.new([[lineno, column], token[1], line, token[3]]) unless line.empty?
index += 1
end
else
results << token
end
else
results << token
end
dedent_next =
((token.event == :on_tstring_content) || (token.event == :on_heredoc_end)) &&
embexpr_balance == 0
end
results
end
end
# Here we will split between the two types of heredocs and return the
# object that will store their tokens.
def self.build(opening)
case opening.value[2]
when "~"
DedentingHeredoc.new
when "-"
DashHeredoc.new(opening.value[3] != "'")
else
PlainHeredoc.new
end
end
end
attr_reader :source, :offsets, :filepath
def initialize(source, filepath = "")
@source = source
@filepath = filepath || ""
@offsets = find_offsets(source)
end
def result
tokens = []
state = :default
heredoc_stack = [[]]
result = YARP.lex(source, @filepath)
result_value = result.value
previous_state = nil
# If there's a UTF-8 byte-order mark as the start of the file, then ripper
# sets every token's on the first line back by 6 bytes. It also keeps the
# byte order mark in the first token's value. This is weird, and I don't
# want to mirror that in our parser. So instead, we'll match up the values
# here, and then match up the locations as we process the tokens.
bom = source.bytes[0..2] == [0xEF, 0xBB, 0xBF]
result_value[0][0].value.prepend("\xEF\xBB\xBF") if bom
result_value.each_with_index do |(token, lex_state), index|
(lineno, column) = find_location(token.location.start_offset)
column -= index == 0 ? 6 : 3 if bom && lineno == 1
event = RIPPER.fetch(token.type)
value = token.value
lex_state = Ripper::Lexer::State.new(lex_state)
token =
case event
when :on___end__
EndContentToken.new([[lineno, column], event, value, lex_state])
when :on_comment
CommentToken.new([[lineno, column], event, value, lex_state])
when :on_heredoc_end
# Heredoc end tokens can be emitted in an odd order, so we don't
# want to bother comparing the state on them.
HeredocEndToken.new([[lineno, column], event, value, lex_state])
when :on_embexpr_end, :on_ident
if lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
# In the event that we're comparing identifiers, we're going to
# allow a little divergence. Ripper doesn't account for local
# variables introduced through named captures in regexes, and we
# do, which accounts for this difference.
IdentToken.new([[lineno, column], event, value, lex_state])
else
Token.new([[lineno, column], event, value, lex_state])
end
when :on_ignored_nl
# Ignored newlines can occasionally have a LABEL state attached to
# them which doesn't actually impact anything. We don't mirror that
# state so we ignored it.
IgnoredNewlineToken.new([[lineno, column], event, value, lex_state])
when :on_regexp_end
# On regex end, Ripper scans and then sets end state, so the ripper
# lexed output is begin, when it should be end. YARP sets lex state
# correctly to end state, but we want to be able to compare against
# Ripper's lexed state. So here, if it's a regexp end token, we
# output the state as the previous state, solely for the sake of
# comparison.
previous_token = result_value[index - 1][0]
lex_state =
if RIPPER.fetch(previous_token.type) == :on_embexpr_end
# If the previous token is embexpr_end, then we have to do even
# more processing. The end of an embedded expression sets the
# state to the state that it had at the beginning of the
# embedded expression. So we have to go and find that state and
# set it here.
counter = 1
current_index = index - 1
until counter == 0
current_index -= 1
current_event = RIPPER.fetch(result_value[current_index][0].type)
counter += { on_embexpr_beg: -1, on_embexpr_end: 1 }[current_event] || 0
end
Ripper::Lexer::State.new(result_value[current_index][1])
else
previous_state
end
Token.new([[lineno, column], event, value, lex_state])
else
Token.new([[lineno, column], event, value, lex_state])
end
previous_state = lex_state
# The order in which tokens appear in our lexer is different from the
# order that they appear in Ripper. When we hit the declaration of a
# heredoc in YARP, we skip forward and lex the rest of the content of
# the heredoc before going back and lexing at the end of the heredoc
# identifier.
#
# To match up to ripper, we keep a small state variable around here to
# track whether we're in the middle of a heredoc or not. In this way we
# can shuffle around the token to match Ripper's output.
case state
when :default
tokens << token
if event == :on_heredoc_beg
state = :heredoc_opened
heredoc_stack.last << Heredoc.build(token)
end
when :heredoc_opened
heredoc_stack.last.last << token
case event
when :on_heredoc_beg
heredoc_stack << [Heredoc.build(token)]
when :on_heredoc_end
state = :heredoc_closed
end
when :heredoc_closed
if %i[on_nl on_ignored_nl on_comment].include?(event) || (event == :on_tstring_content && value.end_with?("\n"))
if heredoc_stack.size > 1
flushing = heredoc_stack.pop
heredoc_stack.last.last << token
flushing.each do |heredoc|
heredoc.to_a.each do |flushed_token|
heredoc_stack.last.last << flushed_token
end
end
state = :heredoc_opened
next
end
elsif event == :on_heredoc_beg
tokens << token
state = :heredoc_opened
heredoc_stack.last << Heredoc.build(token)
next
elsif heredoc_stack.size > 1
heredoc_stack[-2].last << token
next
end
heredoc_stack.last.each do |heredoc|
tokens.concat(heredoc.to_a)
end
heredoc_stack.last.clear
state = :default
tokens << token
end
end
tokens.reject! { |t| t.event == :on_eof }
# We sort by location to compare against Ripper's output
tokens.sort_by!(&:location)
if result_value.size - 1 > tokens.size
raise StandardError, "Lost tokens when performing lex_compat"
end
ParseResult.new(tokens, result.comments, result.errors, result.warnings)
end
private
# YARP keeps locations around in the form of ranges of byte offsets from the
# start of the file. Ripper keeps locations around in the form of line and
# column numbers. To match the output, we keep a cache of the offsets at the
# beginning of each line.
def find_offsets(source)
last_offset = 0
offsets = [0]
source.each_line do |line|
last_offset += line.bytesize
offsets << last_offset
end
offsets
end
# Given a byte offset, find the line number and column number that it maps
# to. We use a binary search over the cached offsets to find the line number
# that the offset is on, and then subtract the offset of the previous line
# to find the column number.
def find_location(value)
line_number = offsets.bsearch_index { |offset| offset > value }
line_offset = offsets[line_number - 1] if line_number
[
line_number || offsets.length - 1,
value - (line_offset || offsets.last)
]
end
end
# The constant that wraps the behavior of the lexer to match Ripper's output
# is an implementation detail, so we don't want it to be public.
private_constant :LexCompat
# Returns an array of tokens that closely resembles that of the Ripper lexer.
# The only difference is that since we don't keep track of lexer state in the
# same way, it's going to always return the NONE state.
def self.lex_compat(source, filepath = "")
LexCompat.new(source, filepath).result
end
end

6434
lib/yarp/node.rb Normal file

File diff suppressed because it is too large Load Diff

185
lib/yarp/pack.rb Normal file
View File

@ -0,0 +1,185 @@
# frozen_string_literal: true
module YARP
module Pack
%i[
SPACE
COMMENT
INTEGER
UTF8
BER
FLOAT
STRING_SPACE_PADDED
STRING_NULL_PADDED
STRING_NULL_TERMINATED
STRING_MSB
STRING_LSB
STRING_HEX_HIGH
STRING_HEX_LOW
STRING_UU
STRING_MIME
STRING_BASE64
STRING_FIXED
STRING_POINTER
MOVE
BACK
NULL
UNSIGNED
SIGNED
SIGNED_NA
AGNOSTIC_ENDIAN
LITTLE_ENDIAN
BIG_ENDIAN
NATIVE_ENDIAN
ENDIAN_NA
SIZE_SHORT
SIZE_INT
SIZE_LONG
SIZE_LONG_LONG
SIZE_8
SIZE_16
SIZE_32
SIZE_64
SIZE_P
SIZE_NA
LENGTH_FIXED
LENGTH_MAX
LENGTH_RELATIVE
LENGTH_NA
].each do |const|
const_set(const, const)
end
class Directive
attr_reader :version, :variant, :source, :type, :signed, :endian, :size, :length_type, :length
def initialize(version, variant, source, type, signed, endian, size, length_type, length)
@version = version
@variant = variant
@source = source
@type = type
@signed = signed
@endian = endian
@size = size
@length_type = length_type
@length = length
end
ENDIAN_DESCRIPTIONS = {
AGNOSTIC_ENDIAN: 'agnostic',
LITTLE_ENDIAN: 'little-endian (VAX)',
BIG_ENDIAN: 'big-endian (network)',
NATIVE_ENDIAN: 'native-endian',
ENDIAN_NA: 'n/a'
}
SIGNED_DESCRIPTIONS = {
UNSIGNED: 'unsigned',
SIGNED: 'signed',
SIGNED_NA: 'n/a'
}
SIZE_DESCRIPTIONS = {
SIZE_SHORT: 'short',
SIZE_INT: 'int-width',
SIZE_LONG: 'long',
SIZE_LONG_LONG: 'long long',
SIZE_8: '8-bit',
SIZE_16: '16-bit',
SIZE_32: '32-bit',
SIZE_64: '64-bit',
SIZE_P: 'pointer-width'
}
def describe
case type
when SPACE
'whitespace'
when COMMENT
'comment'
when INTEGER
if size == SIZE_8
base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} integer"
else
base = "#{SIGNED_DESCRIPTIONS[signed]} #{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} integer"
end
case length_type
when LENGTH_FIXED
if length > 1
base + ", x#{length}"
else
base
end
when LENGTH_MAX
base + ', as many as possible'
end
when UTF8
'UTF-8 character'
when BER
'BER-compressed integer'
when FLOAT
"#{SIZE_DESCRIPTIONS[size]} #{ENDIAN_DESCRIPTIONS[endian]} float"
when STRING_SPACE_PADDED
'arbitrary binary string (space padded)'
when STRING_NULL_PADDED
'arbitrary binary string (null padded, count is width)'
when STRING_NULL_TERMINATED
'arbitrary binary string (null padded, count is width), except that null is added with *'
when STRING_MSB
'bit string (MSB first)'
when STRING_LSB
'bit string (LSB first)'
when STRING_HEX_HIGH
'hex string (high nibble first)'
when STRING_HEX_LOW
'hex string (low nibble first)'
when STRING_UU
'UU-encoded string'
when STRING_MIME
'quoted printable, MIME encoding'
when STRING_BASE64
'base64 encoded string'
when STRING_FIXED
'pointer to a structure (fixed-length string)'
when STRING_POINTER
'pointer to a null-terminated string'
when MOVE
'move to absolute position'
when BACK
'back up a byte'
when NULL
'null byte'
else
raise
end
end
end
class Format
attr_reader :directives, :encoding
def initialize(directives, encoding)
@directives = directives
@encoding = encoding
end
def describe
source_width = directives.map { |d| d.source.inspect.length }.max
directive_lines = directives.map do |directive|
if directive.type == SPACE
source = directive.source.inspect
else
source = directive.source
end
" #{source.ljust(source_width)} #{directive.describe}"
end
(['Directives:'] + directive_lines + ['Encoding:', " #{encoding}"]).join("\n")
end
end
end
end

174
lib/yarp/ripper_compat.rb Normal file
View File

@ -0,0 +1,174 @@
# frozen_string_literal: true
require "ripper"
module YARP
# This class is meant to provide a compatibility layer between YARP and
# Ripper. It functions by parsing the entire tree first and then walking it
# and executing each of the Ripper callbacks as it goes.
#
# This class is going to necessarily be slower than the native Ripper API. It
# is meant as a stopgap until developers migrate to using YARP. It is also
# meant as a test harness for the YARP parser.
class RipperCompat
# This class mirrors the ::Ripper::SexpBuilder subclass of ::Ripper that
# returns the arrays of [type, *children].
class SexpBuilder < RipperCompat
private
Ripper::PARSER_EVENTS.each do |event|
define_method(:"on_#{event}") do |*args|
[event, *args]
end
end
Ripper::SCANNER_EVENTS.each do |event|
define_method(:"on_#{event}") do |value|
[:"@#{event}", value, [lineno, column]]
end
end
end
# This class mirrors the ::Ripper::SexpBuilderPP subclass of ::Ripper that
# returns the same values as ::Ripper::SexpBuilder except with a couple of
# niceties that flatten linked lists into arrays.
class SexpBuilderPP < SexpBuilder
private
def _dispatch_event_new
[]
end
def _dispatch_event_push(list, item)
list << item
list
end
Ripper::PARSER_EVENT_TABLE.each do |event, arity|
case event
when /_new\z/
alias :"on_#{event}" :_dispatch_event_new if arity == 0
when /_add\z/
alias :"on_#{event}" :_dispatch_event_push
end
end
end
attr_reader :source, :lineno, :column
def initialize(source)
@source = source
@result = nil
@lineno = nil
@column = nil
end
############################################################################
# Public interface
############################################################################
def error?
result.errors.any?
end
def parse
result.value.accept(self) unless error?
end
############################################################################
# Visitor methods
############################################################################
def visit(node)
node&.accept(self)
end
def visit_call_node(node)
if !node.opening_loc && node.arguments.arguments.length == 1
bounds(node.receiver.location)
left = visit(node.receiver)
bounds(node.arguments.arguments.first.location)
right = visit(node.arguments.arguments.first)
on_binary(left, source[node.message_loc.start_offset...node.message_loc.end_offset].to_sym, right)
else
raise NotImplementedError
end
end
def visit_integer_node(node)
bounds(node.location)
on_int(source[node.location.start_offset...node.location.end_offset])
end
def visit_statements_node(node)
bounds(node.location)
node.body.inject(on_stmts_new) do |stmts, stmt|
on_stmts_add(stmts, visit(stmt))
end
end
def visit_token(node)
bounds(node.location)
case node.type
when :MINUS
on_op(node.value)
when :PLUS
on_op(node.value)
else
raise NotImplementedError, "Unknown token: #{node.type}"
end
end
def visit_program_node(node)
bounds(node.location)
on_program(visit(node.statements))
end
############################################################################
# Entrypoints for subclasses
############################################################################
# This is a convenience method that runs the SexpBuilder subclass parser.
def self.sexp_raw(source)
SexpBuilder.new(source).parse
end
# This is a convenience method that runs the SexpBuilderPP subclass parser.
def self.sexp(source)
SexpBuilderPP.new(source).parse
end
private
# This method is responsible for updating lineno and column information
# to reflect the current node.
#
# This method could be drastically improved with some caching on the start
# of every line, but for now it's good enough.
def bounds(location)
start_offset = location.start_offset
@lineno = source[0..start_offset].count("\n") + 1
@column = start_offset - (source.rindex("\n", start_offset) || 0)
end
def result
@result ||= YARP.parse(source)
end
def _dispatch0; end
def _dispatch1(_); end
def _dispatch2(_, _); end
def _dispatch3(_, _, _); end
def _dispatch4(_, _, _, _); end
def _dispatch5(_, _, _, _, _); end
def _dispatch7(_, _, _, _, _, _, _); end
(Ripper::SCANNER_EVENT_TABLE.merge(Ripper::PARSER_EVENT_TABLE)).each do |event, arity|
alias :"on_#{event}" :"_dispatch#{arity}"
end
end
end

367
lib/yarp/serialize.rb Normal file
View File

@ -0,0 +1,367 @@
# frozen_string_literal: true
=begin
This file is generated by the bin/template script and should not be
modified manually. See templates/lib/yarp/serialize.rb.erb
if you are looking to modify the template
=end
require "stringio"
module YARP
module Serialize
def self.load(source, serialized)
io = StringIO.new(serialized)
io.set_encoding(Encoding::BINARY)
Loader.new(source, serialized, io).load
end
class Loader
attr_reader :encoding, :source, :serialized, :io
attr_reader :constant_pool_offset, :constant_pool
def initialize(source, serialized, io)
@encoding = Encoding::UTF_8
@source = source.dup
@serialized = serialized
@io = io
@constant_pool_offset = nil
@constant_pool = nil
end
def load
io.read(4) => "YARP"
io.read(3).unpack("C3") => [0, 4, 0]
@encoding = Encoding.find(io.read(load_varint))
@source = source.force_encoding(@encoding).freeze
@constant_pool_offset = io.read(4).unpack1("L")
@constant_pool = Array.new(load_varint, nil)
load_node
end
private
# variable-length integer using https://en.wikipedia.org/wiki/LEB128
# This is also what protobuf uses: https://protobuf.dev/programming-guides/encoding/#varints
def load_varint
n = io.getbyte
if n < 128
n
else
n -= 128
shift = 0
while (b = io.getbyte) >= 128
n += (b - 128) << (shift += 7)
end
n + (b << (shift + 7))
end
end
def load_serialized_length
io.read(4).unpack1("L")
end
def load_optional_node
if io.getbyte != 0
io.pos -= 1
load_node
end
end
def load_string
io.read(load_varint).force_encoding(encoding)
end
def load_location
Location.new(load_varint, load_varint)
end
def load_optional_location
load_location if io.getbyte != 0
end
def load_constant
index = load_varint - 1
constant = constant_pool[index]
unless constant
offset = constant_pool_offset + index * 8
start = serialized.unpack1("L", offset: offset)
length = serialized.unpack1("L", offset: offset + 4)
constant = source.byteslice(start, length).to_sym
constant_pool[index] = constant
end
constant
end
def load_node
type = io.getbyte
start_offset, length = load_varint, load_varint
case type
when 1 then
AliasNode.new(load_node, load_node, load_location, start_offset, length)
when 2 then
AlternationPatternNode.new(load_node, load_node, load_location, start_offset, length)
when 3 then
AndNode.new(load_node, load_node, load_location, start_offset, length)
when 4 then
ArgumentsNode.new(Array.new(load_varint) { load_node }, start_offset, length)
when 5 then
ArrayNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, start_offset, length)
when 6 then
ArrayPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, start_offset, length)
when 7 then
AssocNode.new(load_node, load_optional_node, load_optional_location, start_offset, length)
when 8 then
AssocSplatNode.new(load_optional_node, load_location, start_offset, length)
when 9 then
BackReferenceReadNode.new(start_offset, length)
when 10 then
BeginNode.new(load_optional_location, load_optional_node, load_optional_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length)
when 11 then
BlockArgumentNode.new(load_optional_node, load_location, start_offset, length)
when 12 then
BlockNode.new(Array.new(load_varint) { load_constant }, load_optional_node, load_optional_node, load_location, load_location, start_offset, length)
when 13 then
BlockParameterNode.new(load_optional_location, load_location, start_offset, length)
when 14 then
BlockParametersNode.new(load_optional_node, Array.new(load_varint) { load_location }, load_optional_location, load_optional_location, start_offset, length)
when 15 then
BreakNode.new(load_optional_node, load_location, start_offset, length)
when 16 then
CallNode.new(load_optional_node, load_optional_location, load_optional_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, load_varint, load_string, start_offset, length)
when 17 then
CallOperatorAndWriteNode.new(load_node, load_location, load_node, start_offset, length)
when 18 then
CallOperatorOrWriteNode.new(load_node, load_node, load_location, start_offset, length)
when 19 then
CallOperatorWriteNode.new(load_node, load_location, load_node, load_constant, start_offset, length)
when 20 then
CapturePatternNode.new(load_node, load_node, load_location, start_offset, length)
when 21 then
CaseNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_location, load_location, start_offset, length)
when 22 then
ClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_location, load_optional_node, load_optional_node, load_location, start_offset, length)
when 23 then
ClassVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
when 24 then
ClassVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
when 25 then
ClassVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
when 26 then
ClassVariableReadNode.new(start_offset, length)
when 27 then
ClassVariableWriteNode.new(load_location, load_optional_node, load_optional_location, start_offset, length)
when 28 then
ConstantOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
when 29 then
ConstantOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
when 30 then
ConstantOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
when 31 then
ConstantPathNode.new(load_optional_node, load_node, load_location, start_offset, length)
when 32 then
ConstantPathOperatorAndWriteNode.new(load_node, load_location, load_node, start_offset, length)
when 33 then
ConstantPathOperatorOrWriteNode.new(load_node, load_location, load_node, start_offset, length)
when 34 then
ConstantPathOperatorWriteNode.new(load_node, load_location, load_node, load_constant, start_offset, length)
when 35 then
ConstantPathWriteNode.new(load_node, load_optional_location, load_optional_node, start_offset, length)
when 36 then
ConstantReadNode.new(start_offset, length)
when 37 then
load_serialized_length
DefNode.new(load_location, load_optional_node, load_optional_node, load_optional_node, Array.new(load_varint) { load_constant }, load_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, start_offset, length)
when 38 then
DefinedNode.new(load_optional_location, load_node, load_optional_location, load_location, start_offset, length)
when 39 then
ElseNode.new(load_location, load_optional_node, load_optional_location, start_offset, length)
when 40 then
EmbeddedStatementsNode.new(load_location, load_optional_node, load_location, start_offset, length)
when 41 then
EmbeddedVariableNode.new(load_location, load_node, start_offset, length)
when 42 then
EnsureNode.new(load_location, load_optional_node, load_location, start_offset, length)
when 43 then
FalseNode.new(start_offset, length)
when 44 then
FindPatternNode.new(load_optional_node, load_node, Array.new(load_varint) { load_node }, load_node, load_optional_location, load_optional_location, start_offset, length)
when 45 then
FloatNode.new(start_offset, length)
when 46 then
ForNode.new(load_node, load_node, load_optional_node, load_location, load_location, load_optional_location, load_location, start_offset, length)
when 47 then
ForwardingArgumentsNode.new(start_offset, length)
when 48 then
ForwardingParameterNode.new(start_offset, length)
when 49 then
ForwardingSuperNode.new(load_optional_node, start_offset, length)
when 50 then
GlobalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
when 51 then
GlobalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
when 52 then
GlobalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
when 53 then
GlobalVariableReadNode.new(start_offset, length)
when 54 then
GlobalVariableWriteNode.new(load_location, load_optional_location, load_optional_node, start_offset, length)
when 55 then
HashNode.new(load_location, Array.new(load_varint) { load_node }, load_location, start_offset, length)
when 56 then
HashPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_location, load_optional_location, start_offset, length)
when 57 then
IfNode.new(load_optional_location, load_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length)
when 58 then
ImaginaryNode.new(load_node, start_offset, length)
when 59 then
InNode.new(load_node, load_optional_node, load_location, load_optional_location, start_offset, length)
when 60 then
InstanceVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
when 61 then
InstanceVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
when 62 then
InstanceVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
when 63 then
InstanceVariableReadNode.new(start_offset, length)
when 64 then
InstanceVariableWriteNode.new(load_location, load_optional_node, load_optional_location, start_offset, length)
when 65 then
IntegerNode.new(start_offset, length)
when 66 then
InterpolatedRegularExpressionNode.new(load_location, Array.new(load_varint) { load_node }, load_location, load_varint, start_offset, length)
when 67 then
InterpolatedStringNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, start_offset, length)
when 68 then
InterpolatedSymbolNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, start_offset, length)
when 69 then
InterpolatedXStringNode.new(load_location, Array.new(load_varint) { load_node }, load_location, start_offset, length)
when 70 then
KeywordHashNode.new(Array.new(load_varint) { load_node }, start_offset, length)
when 71 then
KeywordParameterNode.new(load_location, load_optional_node, start_offset, length)
when 72 then
KeywordRestParameterNode.new(load_location, load_optional_location, start_offset, length)
when 73 then
LambdaNode.new(Array.new(load_varint) { load_constant }, load_location, load_optional_node, load_optional_node, start_offset, length)
when 74 then
LocalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
when 75 then
LocalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
when 76 then
LocalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, load_constant, start_offset, length)
when 77 then
LocalVariableReadNode.new(load_constant, load_varint, start_offset, length)
when 78 then
LocalVariableWriteNode.new(load_constant, load_varint, load_optional_node, load_location, load_optional_location, start_offset, length)
when 79 then
MatchPredicateNode.new(load_node, load_node, load_location, start_offset, length)
when 80 then
MatchRequiredNode.new(load_node, load_node, load_location, start_offset, length)
when 81 then
MissingNode.new(start_offset, length)
when 82 then
ModuleNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_node, load_location, start_offset, length)
when 83 then
MultiWriteNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_location, load_optional_location, start_offset, length)
when 84 then
NextNode.new(load_optional_node, load_location, start_offset, length)
when 85 then
NilNode.new(start_offset, length)
when 86 then
NoKeywordsParameterNode.new(load_location, load_location, start_offset, length)
when 87 then
NumberedReferenceReadNode.new(start_offset, length)
when 88 then
OptionalParameterNode.new(load_constant, load_location, load_location, load_node, start_offset, length)
when 89 then
OrNode.new(load_node, load_node, load_location, start_offset, length)
when 90 then
ParametersNode.new(Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_node, start_offset, length)
when 91 then
ParenthesesNode.new(load_optional_node, load_location, load_location, start_offset, length)
when 92 then
PinnedExpressionNode.new(load_node, load_location, load_location, load_location, start_offset, length)
when 93 then
PinnedVariableNode.new(load_node, load_location, start_offset, length)
when 94 then
PostExecutionNode.new(load_optional_node, load_location, load_location, load_location, start_offset, length)
when 95 then
PreExecutionNode.new(load_optional_node, load_location, load_location, load_location, start_offset, length)
when 96 then
ProgramNode.new(Array.new(load_varint) { load_constant }, load_node, start_offset, length)
when 97 then
RangeNode.new(load_optional_node, load_optional_node, load_location, load_varint, start_offset, length)
when 98 then
RationalNode.new(load_node, start_offset, length)
when 99 then
RedoNode.new(start_offset, length)
when 100 then
RegularExpressionNode.new(load_location, load_location, load_location, load_string, load_varint, start_offset, length)
when 101 then
RequiredDestructuredParameterNode.new(Array.new(load_varint) { load_node }, load_location, load_location, start_offset, length)
when 102 then
RequiredParameterNode.new(load_constant, start_offset, length)
when 103 then
RescueModifierNode.new(load_node, load_location, load_node, start_offset, length)
when 104 then
RescueNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_node, load_optional_node, start_offset, length)
when 105 then
RestParameterNode.new(load_location, load_optional_location, start_offset, length)
when 106 then
RetryNode.new(start_offset, length)
when 107 then
ReturnNode.new(load_location, load_optional_node, start_offset, length)
when 108 then
SelfNode.new(start_offset, length)
when 109 then
SingletonClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_location, load_node, load_optional_node, load_location, start_offset, length)
when 110 then
SourceEncodingNode.new(start_offset, length)
when 111 then
SourceFileNode.new(load_string, start_offset, length)
when 112 then
SourceLineNode.new(start_offset, length)
when 113 then
SplatNode.new(load_location, load_optional_node, start_offset, length)
when 114 then
StatementsNode.new(Array.new(load_varint) { load_node }, start_offset, length)
when 115 then
StringConcatNode.new(load_node, load_node, start_offset, length)
when 116 then
StringNode.new(load_optional_location, load_location, load_optional_location, load_string, start_offset, length)
when 117 then
SuperNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, start_offset, length)
when 118 then
SymbolNode.new(load_optional_location, load_location, load_optional_location, load_string, start_offset, length)
when 119 then
TrueNode.new(start_offset, length)
when 120 then
UndefNode.new(Array.new(load_varint) { load_node }, load_location, start_offset, length)
when 121 then
UnlessNode.new(load_location, load_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length)
when 122 then
UntilNode.new(load_location, load_node, load_optional_node, start_offset, length)
when 123 then
WhenNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_node, start_offset, length)
when 124 then
WhileNode.new(load_location, load_node, load_optional_node, start_offset, length)
when 125 then
XStringNode.new(load_location, load_location, load_location, load_string, start_offset, length)
when 126 then
YieldNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, start_offset, length)
end
end
end
end
end

View File

@ -0,0 +1,47 @@
# frozen_string_literal: true
require "yarp_test_helper"
class CommentsTest < Test::Unit::TestCase
include ::YARP::DSL
def test_comment_inline
assert_comment "# comment", :inline
end
def test_comment___END__
source = <<~RUBY
__END__
comment
RUBY
assert_comment source, :__END__
end
def test_comment_embedded_document
source = <<~RUBY
=begin
comment
=end
RUBY
assert_comment source, :embdoc
end
def test_comment_embedded_document_with_content_on_same_line
source = <<~RUBY
=begin other stuff
=end
RUBY
assert_comment source, :embdoc
end
private
def assert_comment(source, type)
result = YARP.parse(source)
assert result.errors.empty?, result.errors.map(&:message).join("\n")
result => YARP::ParseResult[comments: [YARP::Comment[type: type]]]
end
end

212
test/yarp/compile_test.rb Normal file
View File

@ -0,0 +1,212 @@
# frozen_string_literal: true
require "yarp_test_helper"
class CompileTest < Test::Unit::TestCase
def test_AliasNode
assert_compiles("alias foo bar")
end
def test_AndNode
assert_compiles("true && false")
end
def test_ArrayNode
assert_compiles("[]")
assert_compiles("[foo, bar, baz]")
end
def test_AssocNode
assert_compiles("{ foo: bar }")
end
def test_BlockNode
assert_compiles("foo { bar }")
end
def test_BlockNode_with_optionals
assert_compiles("foo { |x = 1| bar }")
end
def test_CallNode
assert_compiles("foo")
assert_compiles("foo(bar)")
end
def test_ClassVariableReadNode
assert_compiles("@@foo")
end
def test_ClassVariableWriteNode
assert_compiles("@@foo = 1")
end
def test_FalseNode
assert_compiles("false")
end
def test_GlobalVariableReadNode
assert_compiles("$foo")
end
def test_GlobalVariableWriteNode
assert_compiles("$foo = 1")
end
def test_HashNode
assert_compiles("{ foo: bar }")
end
def test_InstanceVariableReadNode
assert_compiles("@foo")
end
def test_InstanceVariableWriteNode
assert_compiles("@foo = 1")
end
def test_IntegerNode
assert_compiles("1")
assert_compiles("1_000")
end
def test_InterpolatedStringNode
assert_compiles("\"foo \#{bar} baz\"")
end
def test_LocalVariableWriteNode
assert_compiles("foo = 1")
end
def test_LocalVariableReadNode
assert_compiles("[foo = 1, foo]")
end
def test_NilNode
assert_compiles("nil")
end
def test_OrNode
assert_compiles("true || false")
end
def test_ParenthesesNode
assert_compiles("()")
end
def test_ProgramNode
assert_compiles("")
end
def test_RangeNode
assert_compiles("foo..bar")
assert_compiles("foo...bar")
assert_compiles("(foo..)")
assert_compiles("(foo...)")
assert_compiles("(..bar)")
assert_compiles("(...bar)")
end
def test_SelfNode
assert_compiles("self")
end
def test_StringNode
assert_compiles("\"foo\"")
end
def test_SymbolNode
assert_compiles(":foo")
end
def test_TrueNode
assert_compiles("true")
end
def test_UndefNode
assert_compiles("undef :foo, :bar, :baz")
end
def test_XStringNode
assert_compiles("`foo`")
end
private
def assert_compiles(source)
assert_equal_iseqs(rubyvm_compile(source), YARP.compile(source))
end
# Instruction sequences have 13 elements in their lists. We don't currently
# support all of the fields, so we can't compare the iseqs directly. Instead,
# we compare the elements that we do support.
def assert_equal_iseqs(expected, actual)
# The first element is the magic comment string.
assert_equal expected[0], actual[0]
# The next three elements are the major, minor, and patch version numbers.
# TODO: Insert this check once Ruby 3.3 is released, and the TruffleRuby
# GitHub workflow also checks against Ruby 3.3
# assert_equal expected[1...4], actual[1...4]
# The next element is a set of options for the iseq. It has lots of
# different information, some of which we support and some of which we
# don't.
assert_equal expected[4][:arg_size], actual[4][:arg_size], "Unexpected difference in arg_size"
assert_equal expected[4][:stack_max], actual[4][:stack_max], "Unexpected difference in stack_max"
assert_kind_of Integer, actual[4][:local_size]
assert_kind_of Integer, actual[4][:node_id]
assert_equal expected[4][:code_location].length, actual[4][:code_location].length, "Unexpected difference in code_location length"
assert_equal expected[4][:node_ids].length, actual[4][:node_ids].length, "Unexpected difference in node_ids length"
# Then we have the name of the iseq, the relative file path, the absolute
# file path, and the line number. We don't have this working quite yet.
assert_kind_of String, actual[5]
assert_kind_of String, actual[6]
assert_kind_of String, actual[7]
assert_kind_of Integer, actual[8]
# Next we have the type of the iseq.
assert_equal expected[9], actual[9]
# Next we have the list of local variables. We don't support this yet.
assert_kind_of Array, actual[10]
# Next we have the argument options. These are used in block and method
# iseqs to reflect how the arguments are passed.
assert_equal expected[11], actual[11], "Unexpected difference in argument options"
# Next we have the catch table entries. We don't have this working yet.
assert_kind_of Array, actual[12]
# Finally we have the actual instructions. We support some of this, but omit
# line numbers and some tracepoint events.
expected[13].each do |insn|
case insn
in [:send, opnds, expected_block] unless expected_block.nil?
actual[13].shift => [:send, ^(opnds), actual_block]
assert_equal_iseqs expected_block, actual_block
in Array | :RUBY_EVENT_B_CALL | :RUBY_EVENT_B_RETURN | /^label_\d+/
assert_equal insn, actual[13].shift
in Integer | /^RUBY_EVENT_/
# skip these for now
else
flunk "Unexpected instruction: #{insn.inspect}"
end
end
end
def rubyvm_compile(source)
options = {
peephole_optimization: false,
specialized_instruction: false,
operands_unification: false,
instructions_unification: false,
frozen_string_literal: false
}
RubyVM::InstructionSequence.compile(source, **options).to_a
end
end

View File

@ -0,0 +1,70 @@
# frozen_string_literal: true
require "yarp_test_helper"
class EncodingTest < Test::Unit::TestCase
%w[
ascii
ascii-8bit
big5
binary
euc-jp
gbk
iso-8859-1
iso-8859-2
iso-8859-3
iso-8859-4
iso-8859-5
iso-8859-6
iso-8859-7
iso-8859-8
iso-8859-9
iso-8859-10
iso-8859-11
iso-8859-13
iso-8859-14
iso-8859-15
iso-8859-16
koi8-r
shift_jis
sjis
us-ascii
utf-8
windows-31j
windows-1251
windows-1252
CP1251
CP1252
].each do |encoding|
define_method "test_encoding_#{encoding}" do
result = YARP.parse("# encoding: #{encoding}\nident")
actual = result.value.statements.body.first.name.encoding
assert_equal Encoding.find(encoding), actual
end
end
def test_coding
result = YARP.parse("# coding: utf-8\nident")
actual = result.value.statements.body.first.name.encoding
assert_equal Encoding.find("utf-8"), actual
end
def test_emacs_style
result = YARP.parse("# -*- coding: utf-8 -*-\nident")
actual = result.value.statements.body.first.name.encoding
assert_equal Encoding.find("utf-8"), actual
end
def test_utf_8_variations
%w[
utf-8-unix
utf-8-dos
utf-8-mac
utf-8-*
].each do |encoding|
result = YARP.parse("# coding: #{encoding}\nident")
actual = result.value.statements.body.first.name.encoding
assert_equal Encoding.find("utf-8"), actual
end
end
end

984
test/yarp/errors_test.rb Normal file
View File

@ -0,0 +1,984 @@
# frozen_string_literal: true
require "yarp_test_helper"
class ErrorsTest < Test::Unit::TestCase
include ::YARP::DSL
def test_constant_path_with_invalid_token_after
assert_error_messages "A::$b", [
"Expected identifier or constant after '::'",
"Expected a newline or semicolon after statement."
]
end
def test_module_name_recoverable
expected = ModuleNode(
[],
Location(),
ConstantReadNode(),
StatementsNode(
[ModuleNode([], Location(), MissingNode(), nil, Location())]
),
Location()
)
assert_errors expected, "module Parent module end", [
"Expected to find a module name after `module`."
]
end
def test_for_loops_index_missing
expected = ForNode(
MissingNode(),
expression("1..10"),
StatementsNode([expression("i")]),
Location(),
Location(),
nil,
Location()
)
assert_errors expected, "for in 1..10\ni\nend", ["Expected index after for."]
end
def test_for_loops_only_end
expected = ForNode(
MissingNode(),
MissingNode(),
nil,
Location(),
Location(),
nil,
Location()
)
assert_errors expected, "for end", ["Expected index after for.", "Expected keyword in.", "Expected collection."]
end
def test_pre_execution_missing_brace
expected = PreExecutionNode(
StatementsNode([expression("1")]),
Location(),
Location(),
Location()
)
assert_errors expected, "BEGIN 1 }", ["Expected '{' after 'BEGIN'."]
end
def test_pre_execution_context
expected = PreExecutionNode(
StatementsNode([
CallNode(
expression("1"),
nil,
Location(),
nil,
ArgumentsNode([MissingNode()]),
nil,
nil,
0,
"+"
)
]),
Location(),
Location(),
Location()
)
assert_errors expected, "BEGIN { 1 + }", ["Expected a value after the operator."]
end
def test_unterminated_embdoc
assert_errors expression("1"), "1\n=begin\n", ["Unterminated embdoc"]
end
def test_unterminated_i_list
assert_errors expression("%i["), "%i[", ["Expected a closing delimiter for a `%i` list."]
end
def test_unterminated_w_list
assert_errors expression("%w["), "%w[", ["Expected a closing delimiter for a `%w` list."]
end
def test_unterminated_W_list
assert_errors expression("%W["), "%W[", ["Expected a closing delimiter for a `%W` list."]
end
def test_unterminated_regular_expression
assert_errors expression("/hello"), "/hello", ["Expected a closing delimiter for a regular expression."]
end
def test_unterminated_xstring
assert_errors expression("`hello"), "`hello", ["Expected a closing delimiter for an xstring."]
end
def test_unterminated_string
assert_errors expression('"hello'), '"hello', ["Expected a closing delimiter for an interpolated string."]
end
def test_unterminated_s_symbol
assert_errors expression("%s[abc"), "%s[abc", ["Expected a closing delimiter for a dynamic symbol."]
end
def test_unterminated_parenthesized_expression
assert_errors expression('(1 + 2'), '(1 + 2', ["Expected to be able to parse an expression.", "Expected a closing parenthesis."]
end
def test_1_2_3
assert_errors expression("(1, 2, 3)"), "(1, 2, 3)", [
"Expected to be able to parse an expression.",
"Expected a closing parenthesis.",
"Expected a newline or semicolon after statement.",
"Expected to be able to parse an expression.",
"Expected a newline or semicolon after statement.",
"Expected to be able to parse an expression.",
"Expected a newline or semicolon after statement.",
"Expected to be able to parse an expression."
]
end
def test_return_1_2_3
assert_error_messages "return(1, 2, 3)", [
"Expected to be able to parse an expression.",
"Expected a closing parenthesis.",
"Expected a newline or semicolon after statement.",
"Expected to be able to parse an expression."
]
end
def test_return_1
assert_errors expression("return 1,;"), "return 1,;", ["Expected to be able to parse an argument."]
end
def test_next_1_2_3
assert_errors expression("next(1, 2, 3)"), "next(1, 2, 3)", [
"Expected to be able to parse an expression.",
"Expected a closing parenthesis.",
"Expected a newline or semicolon after statement.",
"Expected to be able to parse an expression."
]
end
def test_next_1
assert_errors expression("next 1,;"), "next 1,;", ["Expected to be able to parse an argument."]
end
def test_break_1_2_3
errors = [
"Expected to be able to parse an expression.",
"Expected a closing parenthesis.",
"Expected a newline or semicolon after statement.",
"Expected to be able to parse an expression."
]
assert_errors expression("break(1, 2, 3)"), "break(1, 2, 3)", errors
end
def test_break_1
assert_errors expression("break 1,;"), "break 1,;", ["Expected to be able to parse an argument."]
end
def test_argument_forwarding_when_parent_is_not_forwarding
assert_errors expression('def a(x, y, z); b(...); end'), 'def a(x, y, z); b(...); end', ["unexpected ... when parent method is not forwarding."]
end
def test_argument_forwarding_only_effects_its_own_internals
assert_errors expression('def a(...); b(...); end; def c(x, y, z); b(...); end'), 'def a(...); b(...); end; def c(x, y, z); b(...); end', ["unexpected ... when parent method is not forwarding."]
end
def test_top_level_constant_with_downcased_identifier
assert_error_messages "::foo", [
"Expected a constant after ::.",
"Expected a newline or semicolon after statement."
]
end
def test_top_level_constant_starting_with_downcased_identifier
assert_error_messages "::foo::A", [
"Expected a constant after ::.",
"Expected a newline or semicolon after statement."
]
end
def test_aliasing_global_variable_with_non_global_variable
assert_errors expression("alias $a b"), "alias $a b", ["Expected a global variable."]
end
def test_aliasing_non_global_variable_with_global_variable
assert_errors expression("alias a $b"), "alias a $b", ["Expected a bare word or symbol argument."]
end
def test_aliasing_global_variable_with_global_number_variable
assert_errors expression("alias $a $1"), "alias $a $1", ["Can't make alias for number variables."]
end
def test_def_with_expression_receiver_and_no_identifier
assert_errors expression("def (a); end"), "def (a); end", [
"Expected '.' or '::' after receiver"
]
end
def test_def_with_multiple_statements_receiver
assert_errors expression("def (\na\nb\n).c; end"), "def (\na\nb\n).c; end", [
"Expected closing ')' for receiver.",
"Expected '.' or '::' after receiver",
"Expected to be able to parse an expression.",
"Expected to be able to parse an expression."
]
end
def test_def_with_empty_expression_receiver
assert_errors expression("def ().a; end"), "def ().a; end", ["Expected to be able to parse receiver."]
end
def test_block_beginning_with_brace_and_ending_with_end
assert_error_messages "x.each { x end", [
"Expected a newline or semicolon after statement.",
"Expected to be able to parse an expression.",
"Expected to be able to parse an expression.",
"Expected block beginning with '{' to end with '}'."
]
end
def test_double_splat_followed_by_splat_argument
expected = CallNode(
nil,
nil,
Location(),
Location(),
ArgumentsNode(
[KeywordHashNode(
[AssocSplatNode(
CallNode(
nil,
nil,
Location(),
nil,
nil,
nil,
nil,
0,
"kwargs"
),
Location()
)]
),
SplatNode(
Location(),
CallNode(nil, nil, Location(), nil, nil, nil, nil, 0, "args")
)]
),
Location(),
nil,
0,
"a"
)
assert_errors expected, "a(**kwargs, *args)", ["Unexpected splat argument after double splat."]
end
def test_arguments_after_block
expected = CallNode(
nil,
nil,
Location(),
Location(),
ArgumentsNode([
BlockArgumentNode(expression("block"), Location()),
expression("foo")
]),
Location(),
nil,
0,
"a"
)
assert_errors expected, "a(&block, foo)", ["Unexpected argument after block argument."]
end
def test_arguments_binding_power_for_and
assert_error_messages "foo(*bar and baz)", [
"Expected a ')' to close the argument list.",
"Expected a newline or semicolon after statement.",
"Expected to be able to parse an expression."
]
end
def test_splat_argument_after_keyword_argument
expected = CallNode(
nil,
nil,
Location(),
Location(),
ArgumentsNode(
[KeywordHashNode(
[AssocNode(
SymbolNode(nil, Location(), Location(), "foo"),
CallNode(nil, nil, Location(), nil, nil, nil, nil, 0, "bar"),
nil
)]
),
SplatNode(
Location(),
CallNode(nil, nil, Location(), nil, nil, nil, nil, 0, "args")
)]
),
Location(),
nil,
0,
"a"
)
assert_errors expected, "a(foo: bar, *args)", ["Unexpected splat argument after double splat."]
end
def test_module_definition_in_method_body
expected = DefNode(
Location(),
nil,
nil,
StatementsNode([ModuleNode([], Location(), ConstantReadNode(), nil, Location())]),
[],
Location(),
nil,
nil,
nil,
nil,
Location()
)
assert_errors expected, "def foo;module A;end;end", ["Module definition in method body"]
end
def test_module_definition_in_method_body_within_block
expected = DefNode(
Location(),
nil,
nil,
StatementsNode(
[CallNode(
nil,
nil,
Location(),
nil,
nil,
nil,
BlockNode(
[],
nil,
StatementsNode([ModuleNode([], Location(), ConstantReadNode(), nil, Location())]),
Location(),
Location()
),
0,
"bar"
)]
),
[],
Location(),
nil,
nil,
nil,
nil,
Location()
)
assert_errors expected, "
def foo
bar do
module Foo;end
end
end
", ["Module definition in method body"]
end
def test_class_definition_in_method_body
expected = DefNode(
Location(),
nil,
nil,
StatementsNode(
[ClassNode(
[],
Location(),
ConstantReadNode(),
nil,
nil,
nil,
Location()
)]
),
[],
Location(),
nil,
nil,
nil,
nil,
Location()
)
assert_errors expected, "def foo;class A;end;end", ["Class definition in method body"]
end
def test_bad_arguments
expected = DefNode(
Location(),
nil,
ParametersNode([], [], [], nil, [], nil, nil),
nil,
[],
Location(),
nil,
Location(),
Location(),
nil,
Location()
)
assert_errors expected, "def foo(A, @a, $A, @@a);end", [
"Formal argument cannot be a constant",
"Formal argument cannot be an instance variable",
"Formal argument cannot be a global variable",
"Formal argument cannot be a class variable",
]
end
def test_cannot_assign_to_a_reserved_numbered_parameter
expected = BeginNode(
Location(),
StatementsNode([
LocalVariableWriteNode(:_1, 0, SymbolNode(Location(), Location(), nil, "a"), Location(), Location()),
LocalVariableWriteNode(:_2, 0, SymbolNode(Location(), Location(), nil, "a"), Location(), Location()),
LocalVariableWriteNode(:_3, 0, SymbolNode(Location(), Location(), nil, "a"), Location(), Location()),
LocalVariableWriteNode(:_4, 0, SymbolNode(Location(), Location(), nil, "a"), Location(), Location()),
LocalVariableWriteNode(:_5, 0, SymbolNode(Location(), Location(), nil, "a"), Location(), Location()),
LocalVariableWriteNode(:_6, 0, SymbolNode(Location(), Location(), nil, "a"), Location(), Location()),
LocalVariableWriteNode(:_7, 0, SymbolNode(Location(), Location(), nil, "a"), Location(), Location()),
LocalVariableWriteNode(:_8, 0, SymbolNode(Location(), Location(), nil, "a"), Location(), Location()),
LocalVariableWriteNode(:_9, 0, SymbolNode(Location(), Location(), nil, "a"), Location(), Location()),
LocalVariableWriteNode(:_10, 0, SymbolNode(Location(), Location(), nil, "a"), Location(), Location())
]),
nil,
nil,
nil,
Location()
)
assert_errors expected, <<~RUBY, Array.new(9, "reserved for numbered parameter")
begin
_1=:a;_2=:a;_3=:a;_4=:a;_5=:a
_6=:a;_7=:a;_8=:a;_9=:a;_10=:a
end
RUBY
end
def test_do_not_allow_trailing_commas_in_method_parameters
expected = DefNode(
Location(),
nil,
ParametersNode(
[RequiredParameterNode(:a), RequiredParameterNode(:b), RequiredParameterNode(:c)],
[],
[],
nil,
[],
nil,
nil
),
nil,
[:a, :b, :c],
Location(),
nil,
Location(),
Location(),
nil,
Location()
)
assert_errors expected, "def foo(a,b,c,);end", [
"Unexpected ','."
]
end
def test_do_not_allow_trailing_commas_in_lambda_parameters
expected = LambdaNode(
[:a, :b],
Location(),
BlockParametersNode(
ParametersNode([RequiredParameterNode(:a), RequiredParameterNode(:b)], [], [], nil, [], nil, nil),
[],
Location(),
Location()
),
nil
)
assert_errors expected, "-> (a, b, ) {}", [
"Unexpected ','."
]
end
def test_do_not_allow_multiple_codepoints_in_a_single_character_literal
expected = StringNode(Location(), Location(), nil, "\u0001\u0002")
assert_errors expected, '?\u{0001 0002}', [
"Multiple codepoints at single character literal"
]
end
def test_do_not_allow_more_than_6_hexadecimal_digits_in_u_Unicode_character_notation
expected = StringNode(Location(), Location(), Location(), "\u0001")
assert_errors expected, '"\u{0000001}"', [
"invalid Unicode escape.",
"invalid Unicode escape."
]
end
def test_do_not_allow_characters_other_than_0_9_a_f_and_A_F_in_u_Unicode_character_notation
expected = StringNode(Location(), Location(), Location(), "\u0000z}")
assert_errors expected, '"\u{000z}"', [
"unterminated Unicode escape",
"unterminated Unicode escape"
]
end
def test_method_parameters_after_block
expected = DefNode(
Location(),
nil,
ParametersNode(
[],
[],
[RequiredParameterNode(:a)],
nil,
[],
nil,
BlockParameterNode(Location(), Location())
),
nil,
[:block, :a],
Location(),
nil,
Location(),
Location(),
nil,
Location()
)
assert_errors expected, "def foo(&block, a)\nend", ["Unexpected parameter order"]
end
def test_method_with_arguments_after_anonymous_block
expected = DefNode(
Location(),
nil,
ParametersNode([], [], [RequiredParameterNode(:a)], nil, [], nil, BlockParameterNode(nil, Location())),
nil,
[:&, :a],
Location(),
nil,
Location(),
Location(),
nil,
Location()
)
assert_errors expected, "def foo(&, a)\nend", ["Unexpected parameter order"]
end
def test_method_parameters_after_arguments_forwarding
expected = DefNode(
Location(),
nil,
ParametersNode(
[],
[],
[RequiredParameterNode(:a)],
nil,
[],
ForwardingParameterNode(),
nil
),
nil,
[:"...", :a],
Location(),
nil,
Location(),
Location(),
nil,
Location()
)
assert_errors expected, "def foo(..., a)\nend", ["Unexpected parameter order"]
end
def test_keywords_parameters_before_required_parameters
expected = DefNode(
Location(),
nil,
ParametersNode(
[],
[],
[RequiredParameterNode(:a)],
nil,
[KeywordParameterNode(Location(), nil)],
nil,
nil
),
nil,
[:b, :a],
Location(),
nil,
Location(),
Location(),
nil,
Location()
)
assert_errors expected, "def foo(b:, a)\nend", ["Unexpected parameter order"]
end
def test_rest_keywords_parameters_before_required_parameters
expected = DefNode(
Location(),
nil,
ParametersNode(
[],
[],
[],
nil,
[KeywordParameterNode(Location(), nil)],
KeywordRestParameterNode(Location(), Location()),
nil
),
nil,
[:rest, :b],
Location(),
nil,
Location(),
Location(),
nil,
Location()
)
assert_errors expected, "def foo(**rest, b:)\nend", ["Unexpected parameter order"]
end
def test_double_arguments_forwarding
expected = DefNode(
Location(),
nil,
ParametersNode([], [], [], nil, [], ForwardingParameterNode(), nil),
nil,
[:"..."],
Location(),
nil,
Location(),
Location(),
nil,
Location()
)
assert_errors expected, "def foo(..., ...)\nend", ["Unexpected parameter order"]
end
def test_multiple_error_in_parameters_order
expected = DefNode(
Location(),
nil,
ParametersNode(
[],
[],
[RequiredParameterNode(:a)],
nil,
[KeywordParameterNode(Location(), nil)],
KeywordRestParameterNode(Location(), Location()),
nil
),
nil,
[:args, :a, :b],
Location(),
nil,
Location(),
Location(),
nil,
Location()
)
assert_errors expected, "def foo(**args, a, b:)\nend", ["Unexpected parameter order", "Unexpected parameter order"]
end
def test_switching_to_optional_arguments_twice
expected = DefNode(
Location(),
nil,
ParametersNode(
[],
[],
[RequiredParameterNode(:a)],
nil,
[KeywordParameterNode(Location(), nil)],
KeywordRestParameterNode(Location(), Location()),
nil
),
nil,
[:args, :a, :b],
Location(),
nil,
Location(),
Location(),
nil,
Location(),
)
assert_errors expected, "def foo(**args, a, b:)\nend", ["Unexpected parameter order", "Unexpected parameter order"]
end
def test_switching_to_named_arguments_twice
expected = DefNode(
Location(),
nil,
ParametersNode(
[],
[],
[RequiredParameterNode(:a)],
nil,
[KeywordParameterNode(Location(), nil)],
KeywordRestParameterNode(Location(), Location()),
nil
),
nil,
[:args, :a, :b],
Location(),
nil,
Location(),
Location(),
nil,
Location(),
)
assert_errors expected, "def foo(**args, a, b:)\nend", ["Unexpected parameter order", "Unexpected parameter order"]
end
def test_returning_to_optional_parameters_multiple_times
expected = DefNode(
Location(),
nil,
ParametersNode(
[RequiredParameterNode(:a)],
[
OptionalParameterNode(:b, Location(), Location(), IntegerNode()),
OptionalParameterNode(:d, Location(), Location(), IntegerNode())
],
[RequiredParameterNode(:c), RequiredParameterNode(:e)],
nil,
[],
nil,
nil
),
nil,
[:a, :b, :c, :d, :e],
Location(),
nil,
Location(),
Location(),
nil,
Location(),
)
assert_errors expected, "def foo(a, b = 1, c, d = 2, e)\nend", ["Unexpected parameter order"]
end
def test_case_without_when_clauses_errors_on_else_clause
expected = CaseNode(
SymbolNode(Location(), Location(), nil, "a"),
[],
ElseNode(Location(), nil, Location()),
Location(),
Location()
)
assert_errors expected, "case :a\nelse\nend", ["Unexpected else without no when clauses in case statement."]
end
def test_setter_method_cannot_be_defined_in_an_endless_method_definition
expected = DefNode(
Location(),
nil,
nil,
StatementsNode([IntegerNode()]),
[],
Location(),
nil,
Location(),
Location(),
Location(),
nil
)
assert_errors expected, "def a=() = 42", ["Setter method cannot be defined in an endless method definition"]
end
def test_do_not_allow_forward_arguments_in_lambda_literals
expected = LambdaNode(
[:"..."],
Location(),
BlockParametersNode(ParametersNode([], [], [], nil, [], ForwardingParameterNode(), nil), [], Location(), Location()),
nil
)
assert_errors expected, "->(...) {}", ["Unexpected ..."]
end
def test_do_not_allow_forward_arguments_in_blocks
expected = CallNode(
nil,
nil,
Location(),
nil,
nil,
nil,
BlockNode(
[:"..."],
BlockParametersNode(ParametersNode([], [], [], nil, [], ForwardingParameterNode(), nil), [], Location(), Location()),
nil,
Location(),
Location()
),
0,
"a"
)
assert_errors expected, "a {|...|}", ["Unexpected ..."]
end
def test_dont_allow_return_inside_class_body
expected = ClassNode(
[],
Location(),
ConstantReadNode(),
nil,
nil,
StatementsNode([ReturnNode(Location(), nil)]),
Location()
)
assert_errors expected, "class A; return; end", ["Invalid return in class/module body"]
end
def test_dont_allow_return_inside_module_body
expected = ModuleNode(
[],
Location(),
ConstantReadNode(),
StatementsNode([ReturnNode(Location(), nil)]),
Location()
)
assert_errors expected, "module A; return; end", ["Invalid return in class/module body"]
end
def test_dont_allow_setting_to_back_and_nth_reference
expected = BeginNode(
Location(),
StatementsNode([
GlobalVariableWriteNode(Location(), Location(), NilNode()),
GlobalVariableWriteNode(Location(), Location(), NilNode())
]),
nil,
nil,
nil,
Location()
)
assert_errors expected, "begin\n$+ = nil\n$1466 = nil\nend", ["Can't set variable", "Can't set variable"]
end
def test_duplicated_parameter_names
expected = DefNode(
Location(),
nil,
ParametersNode([RequiredParameterNode(:a), RequiredParameterNode(:b), RequiredParameterNode(:a)], [], [], nil, [], nil, nil),
nil,
[:a, :b],
Location(),
nil,
Location(),
Location(),
nil,
Location()
)
assert_errors expected, "def foo(a,b,a);end", ["Duplicated parameter name."]
expected = DefNode(
Location(),
nil,
ParametersNode([RequiredParameterNode(:a), RequiredParameterNode(:b)], [], [], RestParameterNode(Location(), Location()), [], nil, nil),
nil,
[:a, :b],
Location(),
nil,
Location(),
Location(),
nil,
Location()
)
assert_errors expected, "def foo(a,b,*a);end", ["Duplicated parameter name."]
expected = DefNode(
Location(),
nil,
ParametersNode([RequiredParameterNode(:a), RequiredParameterNode(:b)], [], [], nil, [], KeywordRestParameterNode(Location(), Location()), nil),
nil,
[:a, :b],
Location(),
nil,
Location(),
Location(),
nil,
Location()
)
assert_errors expected, "def foo(a,b,**a);end", ["Duplicated parameter name."]
expected = DefNode(
Location(),
nil,
ParametersNode([RequiredParameterNode(:a), RequiredParameterNode(:b)], [], [], nil, [], nil, BlockParameterNode(Location(), Location())),
nil,
[:a, :b],
Location(),
nil,
Location(),
Location(),
nil,
Location()
)
assert_errors expected, "def foo(a,b,&a);end", ["Duplicated parameter name."]
end
private
def assert_errors(expected, source, errors)
assert_nil Ripper.sexp_raw(source)
result = YARP.parse(source)
result => YARP::ParseResult[value: YARP::ProgramNode[statements: YARP::StatementsNode[body: [*, node]]]]
assert_equal_nodes(expected, node, compare_location: false)
assert_equal(errors, result.errors.map(&:message))
end
def assert_error_messages(source, errors)
assert_nil Ripper.sexp_raw(source)
result = YARP.parse(source)
assert_equal(errors, result.errors.map(&:message))
end
def expression(source)
YARP.parse(source) => YARP::ParseResult[value: YARP::ProgramNode[statements: YARP::StatementsNode[body: [*, node]]]]
node
end
end

View File

@ -0,0 +1,23 @@
alias :foo :bar
alias %s[abc] %s[def]
alias :'abc' :'def'
alias :"abc#{1}" :'def'
alias $a $'
alias foo bar
alias $foo $bar
alias foo if
alias foo <=>
alias :== :eql?
alias A B
alias :A :B

View File

@ -0,0 +1,8 @@
foo !bar
-foo*bar
+foo**bar
foo ~bar

View File

@ -0,0 +1,80 @@
[*a]
foo[bar, baz] = 1, 2, 3
[a: [:b, :c]]
[:a, :b,
:c,1,
:d,
]
[:a, :b,
:c,1,
:d
]
[foo => bar]
foo[bar][baz] = qux
foo[bar][baz]
[
]
foo[bar, baz]
foo[bar, baz] = qux
foo[0], bar[0] = 1, 2
foo[bar[baz] = qux]
foo[bar]
foo[bar] = baz
[**{}]
[**kw]
[1, **kw]
[1, **kw, **{}, **kw]
[
foo => bar,
]
%i#one two three#
%w#one two three#
%x#one two three#
%i@one two three@
%w@one two three@
%x@one two three@
%i{one two three}
%w{one two three}
%x{one two three}

View File

@ -0,0 +1,14 @@
begin
a
ensure
b
end
begin; a; ensure; b; end
begin a
ensure b
end
begin a; ensure b; end

View File

@ -0,0 +1,79 @@
begin; a; rescue; b; else; c; end
begin; a; rescue; b; else; c; ensure; d; end
begin
a
end
begin; a; end
begin a
end
begin a; end
begin
a
rescue
b
rescue
c
rescue
d
end
begin
a
rescue Exception => ex
b
rescue AnotherException, OneMoreException => ex
c
end
begin
a
rescue Exception => ex
b
ensure
b
end
%!abc!
begin
a
rescue
b
end
begin;a;rescue;b;end
begin
a;rescue
b;end
begin
a
rescue Exception
b
end
begin
a
rescue Exception, CustomException
b
end
begin
a
rescue Exception, CustomException => ex
b
end
begin
a
rescue Exception => ex
b
end

View File

@ -0,0 +1,54 @@
foo[bar] { baz }
foo[bar] do
baz
end
x.reduce(0) { |x, memo| memo += x }
foo do end
foo bar, (baz do end)
foo bar do end
foo bar baz do end
foo do |a = b[1]|
end
foo do
rescue
end
foo do
bar do
baz do
end
end
end
foo[bar] { baz }
foo { |x, y = 2, z:| x }
foo { |x| }
fork = 1
fork do |a|
end
fork { |a| }
C do
end
C {}
foo lambda { |
a: 1,
b: 2
|
}
foo do |bar,| end

View File

@ -0,0 +1,5 @@
a &&= b
a += b
a ||= b

View File

@ -0,0 +1,3 @@
false
true

View File

@ -0,0 +1,25 @@
break
break (1), (2), (3)
break 1
break 1, 2,
3
break 1, 2, 3
break [1, 2, 3]
break(
1
2
)
break()
break(1)
foo { break 42 } == 42
foo { |a| break } == 42

View File

@ -0,0 +1,30 @@
case :hi
when :hi
end
case true; when true; puts :hi; when false; puts :bye; end
case; when *foo; end
case :hi
when :hi
else
:b
end
case this; when FooBar, BazBonk; end
case
when foo == bar
end
case
when a
else
# empty
end
case type;
;when :b;
; else;
end

View File

@ -0,0 +1,35 @@
class A a = 1 end
class A; ensure; end
class A; rescue; else; ensure; end
class A < B
a = 1
end
class << not foo
end
class A; class << self; ensure; end; end
class A; class << self; rescue; else; ensure; end; end
class << foo.bar
end
class << foo.bar;end
class << self
end
class << self;end
class << self
1 + 2
end
class << self;1 + 2;end
class A < B[1]
end

View File

@ -0,0 +1,24 @@
a
# Comment
b
c # Comment
d
e
# Comment
.f
g
# Comment
.h
i # Comment
.j
k # Comment
.l
m
# Comment
&.n

View File

@ -0,0 +1,169 @@
A::B
A::B::C
a::B
A::B = 1
A = 1
ABC
Foo 1
::A::foo
::A = 1
::A::B = 1
::A::B
::A
A::false
A::B::true
A::&
A::`
A::!
A::!=
A::^
A::==
A::===
A::=~
A::>
A::>=
A::>>
A::<<
A::\
A::alias
A::and
A::begin
A::BEGIN
A::break
A::class
A::def
A::defined
A::do
A::else
A::elsif
A::end
A::END
A::ensure
A::false
A::for
A::if
A::in
A::next
A::nil
A::not
A::or
A::redo
A::rescue
A::retry
A::return
A::self
A::super
A::then
A::true
A::undef
A::unless
A::until
A::when
A::while
A::yield
A::__ENCODING__
A::__FILE__
A::__LINE__
A::<
A::<=>
A::<<
A::-
A::%
A::%i
A::%w
A::%x
A::%I
A::%W
A::|
A::+
A::/
A::*
A::**
A::~
A::_::
A::_..
A::__END__

View File

@ -0,0 +1,44 @@
<<-EOF
a
EOF
<<-FIRST + <<-SECOND
a
FIRST
b
SECOND
<<-`EOF`
a
#{b}
EOF
<<-EOF #comment
a
EOF
<<-EOF
a
b
EOF
<<-"EOF"
a
#{b}
EOF
<<-EOF
a
#{b}
EOF
%#abc#
<<-EOF
a
b
EOF
<<-'EOF'
a #{1}
EOF

View File

@ -0,0 +1,7 @@
defined? 1 and defined? 2
defined?(x %= 2)
defined?(foo and bar)
defined? 1

View File

@ -0,0 +1,20 @@
puts "hi"\
"there"
%I{a\
b}
<<-E
1 \
2
3
E
x = %
a = foo(<<~EOF.chop)
baz
EOF

View File

@ -0,0 +1,2 @@
=begin
=end

View File

@ -0,0 +1,19 @@
for i in 1..10
i
end
for i in 1..10; i; end
for i,j in 1..10
i
end
for i,j,k in 1..10
i
end
for i in 1..10 do
i
end
for i in 1..10; i; end

View File

@ -0,0 +1,89 @@
$global_variable
$_
$-w
$LOAD_PATH
$stdin
$stdout
$stderr
$!
$?
$~
$&
$`
$'
$+
$:
$;
$,
$DEBUG
$FILENAME
$-0
$LOADED_FEATURES
$VERBOSE
$-K
:$global_variable
:$_
:$-w
:$LOAD_PATH
:$stdin
:$stdout
:$stderr
:$!
:$?
:$~
:$&
:$`
:$'
:$+
:$:
:$;
:$DEBUG
:$FILENAME
:$-0
:$LOADED_FEATURES
:$VERBOSE
:$-K

View File

@ -0,0 +1,20 @@
{}
{
}
{ a => b, c => d }
{ a => b, **c }
{
a: b,
c: d
}
{ a: b, c: d, **e, f: g }
{ "a": !b? }

View File

@ -0,0 +1,2 @@
<<-END
END

View File

@ -0,0 +1,9 @@
<<~RUBY
pre
#{
<<RUBY
hello
RUBY
}
post
RUBY

View File

@ -0,0 +1,14 @@
<<-HERE\
HERE
<<~THERE\
way over
<<HERE
not here
HERE
<<~BUT\
but
BUT
there
THERE

View File

@ -0,0 +1,4 @@
<<-EOE
some
heredocs
EOE

31
test/yarp/fixtures/if.txt Normal file
View File

@ -0,0 +1,31 @@
if true; 1; end
if true
1 else 2 end
if true then true elsif false then false elsif nil then nil else self end
1 if true
break if true
next if true
return if true
if exit_loop then break 42 end
if foo
then bar
end
a if b if c
if true
a b:
else
end
if type in 1
elsif type in B
end

View File

@ -0,0 +1,63 @@
!1
~1
1 != 2
1 !~ 2
1 % 2
1 & 2
1 * 2
1**2
1 + 2
1 - 2
1 / 2
1/2/3
1 < 2
1 << 2
1 <= 2
1 <=> 2
1 == 2
1 === 2
1 =~ 2
1 > 2
1 >= 2
1 >> 2
1 ^ 2
1 | 2
1 && 2
1 and 2
1 * 2 ** 3
1 * 2 + 3
1 or 2
1 || 2
1 + 2 * 3
(1 + 1)

View File

@ -0,0 +1,29 @@
def def
end
def self.ensure
end
private def foo
bar do
end
end
def m(a, **nil)
end
def __ENCODING__.a
end
%{abc}
%"abc"
def __FILE__.a
end
def __LINE__.a
end
def nil::a
end

View File

@ -0,0 +1,11 @@
redo
retry
self
__ENCODING__
__FILE__
__LINE__

View File

@ -0,0 +1,7 @@
->(
foo
) {}
->(x: "b#{a}") { }
->(a: b * 3) {}

View File

@ -0,0 +1,139 @@
foo.bar %{baz}
a.b(c, d)
a.b()
foo
.bar
&.baz
a!
a.()
a.(1, 2, 3)
a::b
foo.bar = 1
a?
a(&block)
a(**kwargs)
a.b.c
a(b, c)
a()
a(*args)
a b, c
a.b c, d
foo.foo, bar.bar = 1, 2
a&.b
a&.()
a&.b(c)
a&.b()
foo :a, :b if bar? or baz and qux
foo(:a,
:b
)
foo(*rest)
foo(:a, :h => [:x, :y], :a => :b, &:bar)
hi 123, { :there => :friend, **{}, whatup: :dog }
foo :a, b: true do |a, b| puts a end
hi there: :friend
hi :there => :friend, **{}, whatup: :dog
hi(:there => :friend, **{}, whatup: :dog)
foo({ a: true, b: false, }, &:block)
hi :there => :friend
foo(:a,
:b,
)
foo(
:a,
b: :c,
)
foo a: true, b: false, &:block
some_func 1, kwarg: 2
Kernel.Integer(10)
x.each { }
foo.map { $& }
A::B::C :foo
A::B::C(:foo)
A::B::C(:foo) { }
foo("a": -1)
foo bar: { baz: qux do end }
foo bar: { **kw do end }
foo "#{bar.map do "baz" end}" do end
foo class Bar baz do end end
foo module Bar baz do end end
foo [baz do end]
p begin 1.times do 1 end end
foo :a,
if x
bar do |a|
a
end
end
foo :a,
while x
bar do |a|
a
end
end,
until x
baz do
end
end
{} + A {}
{} + A { |a| a }
A {} + A {}
lst << A {}

View File

@ -0,0 +1,165 @@
def foo((bar, baz))
end
def foo((bar, baz), optional = 1, (bin, bag))
end
def a; ensure; end
def (b).a
end
def (a)::b
end
def false.a
end
def a(...)
end
def $var.a
end
def a.b
end
def @var.a
end
def a b:; end
%,abc,
def a(b:)
end
def a(**b)
end
def a(**)
end
a = 1; def a
end
def a b, c, d
end
def nil.a
end
def a b:, c: 1
end
def a(b:, c: 1)
end
def a(b:
1, c:)
end
%.abc.
def a b = 1, c = 2
end
def a()
end
def a b, c = 2
end
def a b
end
def a; rescue; else; ensure; end
def a *b
end
def a(*)
end
def a
b = 1
end
def self.a
end
def true.a
end
def a
end
def hi
return :hi if true
:bye
end
def foo = 1
def bar = 2
def foo(bar) = 123
def foo = 123
def a(*); b(*); end
def a(...); b(...); end
def a(...); b(1, 2, ...); end
def (c = b).a
end
def a &b
end
def a(&)
end
def @@var.a
end
def (a = b).C
end
def self.Array_function; end
Const = 1; def Const.a
end
def a(...); "foo#{b(...)}"; end
def foo
{}.merge **bar, **baz, **qux
end
def bar(a: (1...10))
end
def bar(a: (...10))
end
def bar(a: (1...))
end
def bar(a = (1...10))
end
def bar(a = (...10))
end
def bar(a = (1...))
end
def method(a)
item >> a {}
end
def foo(_a, _a, b, c)
end

View File

@ -0,0 +1,18 @@
module A a = 1 end
%Q{aaa #{bbb} ccc}
module m::M
end
module A
x = 1; rescue; end
module ::A
end
module A[]::B
end
module A[1]::B
end

View File

@ -0,0 +1,24 @@
next
next (1), (2), (3)
next 1
next 1, 2,
3
next 1, 2, 3
next [1, 2, 3]
next(
1
2
)
next
1
next()
next(1)

View File

@ -0,0 +1,13 @@
nil
()
(
;
;
)
END { 1 }
BEGIN { 1 }

View File

@ -0,0 +1,105 @@
def !
end
def !=
end
def !~
end
def %
end
def self.+
end
def &
end
def *
end
def **
end
%|abc|
def + **b
end
def +()
end
def + b
end
def self.+
end
def +
end
def +@
end
def -
end
def a.-;end
def -@
end
def /
end
def <
end
def <<
end
def <=
end
def <=>
end
def ==
end
def ===
end
def =~
end
def >
end
def >=
end
def >>
end
def []
end
def []=
end
def ^
end
def `
end
def self.`
end
def |
end
def ~
end

View File

@ -0,0 +1,20 @@
not foo and not bar
not(foo and bar)
not foo
not foo and not
bar
not foo and
not
bar
not foo and
not
bar

View File

@ -0,0 +1,63 @@
0
1
1.0
2
0b0
0b1
0b10
0d0
0d1
0d2
00
01
02
0o0
0o1
0o2
0x0
0x1
0x2
1i
1r
-1
1ri
1.2ri
-1ri
-1.2ri
0o1r
0o1i
0o1ri
0d1r
0d1i
0b1ri

View File

@ -0,0 +1,191 @@
foo => bar
foo => 1
foo => 1.0
foo => 1i
foo => 1r
foo => :foo
foo => %s[foo]
foo => :"foo"
foo => /foo/
foo => `foo`
foo => %x[foo]
foo => %i[foo]
foo => %I[foo]
foo => %w[foo]
foo => %W[foo]
foo => %q[foo]
foo => %Q[foo]
foo => "foo"
foo => nil
foo => self
foo => true
foo => false
foo => __FILE__
foo => __LINE__
foo => __ENCODING__
foo => -> { bar }
foo => 1 .. 1
foo => 1.0 .. 1.0
foo => 1i .. 1i
foo => 1r .. 1r
foo => :foo .. :foo
foo => %s[foo] .. %s[foo]
foo => :"foo" .. :"foo"
foo => /foo/ .. /foo/
foo => `foo` .. `foo`
foo => %x[foo] .. %x[foo]
foo => %i[foo] .. %i[foo]
foo => %I[foo] .. %I[foo]
foo => %w[foo] .. %w[foo]
foo => %W[foo] .. %W[foo]
foo => %q[foo] .. %q[foo]
foo => %Q[foo] .. %Q[foo]
foo => "foo" .. "foo"
foo => nil .. nil
foo => self .. self
foo => true .. true
foo => false .. false
foo => __FILE__ .. __FILE__
foo => __LINE__ .. __LINE__
foo => __ENCODING__ .. __ENCODING__
foo => -> { bar } .. -> { bar }
foo => ^bar
foo => ^@bar
foo => ^@@bar
foo => ^$bar
foo => ^(1)
foo => ^(nil)
foo => ^("bar" + "baz")
foo => Foo
foo => Foo::Bar::Baz
foo => ::Foo
foo => ::Foo::Bar::Baz
foo => Foo()
foo => Foo(1)
foo => Foo(1, 2, 3)
foo => Foo(bar)
foo => Foo(*bar, baz)
foo => Foo(bar, *baz)
foo => Foo(*bar, baz, *qux)
foo => Foo[]
foo => Foo[1]
foo => Foo[1, 2, 3]
foo => Foo[bar]
foo => Foo[*bar, baz]
foo => Foo[bar, *baz]
foo => Foo[*bar, baz, *qux]
foo => *bar
foo => *bar, baz, qux
foo => bar, *baz, qux
foo => bar, baz, *qux
foo => *bar, baz, *qux
foo => []
foo => [[[[[]]]]]
foo => [*bar]
foo => [*bar, baz, qux]
foo => [bar, *baz, qux]
foo => [bar, baz, *qux]
foo => [*bar, baz, *qux]
foo in bar
foo in 1
foo in 1.0
foo in 1i
foo in 1r
foo in :foo
foo in %s[foo]
foo in :"foo"
foo in /foo/
foo in `foo`
foo in %x[foo]
foo in %i[foo]
foo in %I[foo]
foo in %w[foo]
foo in %W[foo]
foo in %q[foo]
foo in %Q[foo]
foo in "foo"
foo in nil
foo in self
foo in true
foo in false
foo in __FILE__
foo in __LINE__
foo in __ENCODING__
foo in -> { bar }
case foo; in bar then end
case foo; in 1 then end
case foo; in 1.0 then end
case foo; in 1i then end
case foo; in 1r then end
case foo; in :foo then end
case foo; in %s[foo] then end
case foo; in :"foo" then end
case foo; in /foo/ then end
case foo; in `foo` then end
case foo; in %x[foo] then end
case foo; in %i[foo] then end
case foo; in %I[foo] then end
case foo; in %w[foo] then end
case foo; in %W[foo] then end
case foo; in %q[foo] then end
case foo; in %Q[foo] then end
case foo; in "foo" then end
case foo; in nil then end
case foo; in self then end
case foo; in true then end
case foo; in false then end
case foo; in __FILE__ then end
case foo; in __LINE__ then end
case foo; in __ENCODING__ then end
case foo; in -> { bar } then end
case foo; in bar if baz then end
case foo; in 1 if baz then end
case foo; in 1.0 if baz then end
case foo; in 1i if baz then end
case foo; in 1r if baz then end
case foo; in :foo if baz then end
case foo; in %s[foo] if baz then end
case foo; in :"foo" if baz then end
case foo; in /foo/ if baz then end
case foo; in `foo` if baz then end
case foo; in %x[foo] if baz then end
case foo; in %i[foo] if baz then end
case foo; in %I[foo] if baz then end
case foo; in %w[foo] if baz then end
case foo; in %W[foo] if baz then end
case foo; in %q[foo] if baz then end
case foo; in %Q[foo] if baz then end
case foo; in "foo" if baz then end
case foo; in nil if baz then end
case foo; in self if baz then end
case foo; in true if baz then end
case foo; in false if baz then end
case foo; in __FILE__ if baz then end
case foo; in __LINE__ if baz then end
case foo; in __ENCODING__ if baz then end
case foo; in -> { bar } if baz then end
if a in []
end
a => [
b
]
foo in A[
bar: B[
value: a
]
]

View File

@ -0,0 +1,27 @@
-> (a; b, c, d) { b }
-> do
ensure
end
-> do
rescue
else
ensure
end
-> { foo }
-> do; foo; end
-> a, b = 1, c:, d:, &e { a }
-> (a, b = 1, *c, d:, e:, **f, &g) { a }
-> (a, b = 1, *c, d:, e:, **f, &g) do
a
end
-> (a) { -> b { a * b } }
-> ((a, b), *c) { }

View File

@ -0,0 +1 @@
...2

View File

@ -0,0 +1 @@
..2

View File

@ -0,0 +1 @@
2...

View File

@ -0,0 +1 @@
2..

View File

@ -0,0 +1,17 @@
(...2)
(..2)
1...2
foo[...2]
{ foo: ...bar }
(1...)
1..2
{ foo: ..bar }
(1..)

View File

@ -0,0 +1,28 @@
foo /bar/
%r{abc}i
/a\b/
/aaa #$bbb/
/aaa #{bbb} ccc/
[/(?<foo>bar)/ =~ baz, foo]
/abc/i
%r/[a-z$._?][\w$.?#@~]*:/i
%r/([a-z$._?][\w$.?#@~]*)(\s+)(equ)/i
%r/[a-z$._?][\w$.?#@~]*/i
%r(
(?:[\w#$%_']|\(\)|\(,\)|\[\]|[0-9])*
(?:[\w#$%_']+)
)
/(?#\))/ =~ "hi"
%r#pound#

View File

@ -0,0 +1,31 @@
foo rescue nil
foo rescue
nil
break rescue nil
next rescue nil
return rescue nil
foo rescue nil || 1
foo rescue nil ? 1 : 2
begin; a; rescue *b; end
foo do |x|
bar(y) rescue ArgumentError fail "baz"
end
if a = foo rescue nil
bar
end
def some_method = other_method 42 rescue nil
def a
a b:
rescue
end

View File

@ -0,0 +1,24 @@
return
return (1), (2), (3)
return *1
return 1
return 1, 2,
3
return 1, 2, 3
return [1, 2, 3]
return(
1
2
)
return()
return(1)

View File

@ -0,0 +1 @@
BEGIN { 42 }

View File

@ -0,0 +1,113 @@
= ruby_parser
home :: https://github.com/seattlerb/ruby_parser
bugs :: https://github.com/seattlerb/ruby_parser/issues
rdoc :: http://docs.seattlerb.org/ruby_parser
== DESCRIPTION:
ruby_parser (RP) is a ruby parser written in pure ruby (utilizing
racc--which does by default use a C extension). It outputs
s-expressions which can be manipulated and converted back to ruby via
the ruby2ruby gem.
As an example:
def conditional1 arg1
return 1 if arg1 == 0
return 0
end
becomes:
s(:defn, :conditional1, s(:args, :arg1),
s(:if,
s(:call, s(:lvar, :arg1), :==, s(:lit, 0)),
s(:return, s(:lit, 1)),
nil),
s(:return, s(:lit, 0)))
Tested against 801,039 files from the latest of all rubygems (as of 2013-05):
* 1.8 parser is at 99.9739% accuracy, 3.651 sigma
* 1.9 parser is at 99.9940% accuracy, 4.013 sigma
* 2.0 parser is at 99.9939% accuracy, 4.008 sigma
* 2.6 parser is at 99.9972% accuracy, 4.191 sigma
* 3.0 parser has a 100% parse rate.
* Tested against 2,672,412 unique ruby files across 167k gems.
* As do all the others now, basically.
== FEATURES/PROBLEMS:
* Pure ruby, no compiles.
* Includes preceding comment data for defn/defs/class/module nodes!
* Incredibly simple interface.
* Output is 100% equivalent to ParseTree.
* Can utilize PT's SexpProcessor and UnifiedRuby for language processing.
* Known Issue: Speed is now pretty good, but can always improve:
* RP parses a corpus of 3702 files in 125s (avg 108 Kb/s)
* MRI+PT parsed the same in 67.38s (avg 200.89 Kb/s)
* Known Issue: Code is much better, but still has a long way to go.
* Known Issue: Totally awesome.
* Known Issue: line number values can be slightly off. Parsing LR sucks.
== SYNOPSIS:
RubyParser.new.parse "1+1"
# => s(:call, s(:lit, 1), :+, s(:lit, 1))
You can also use Ruby19Parser, Ruby18Parser, or RubyParser.for_current_ruby:
RubyParser.for_current_ruby.parse "1+1"
# => s(:call, s(:lit, 1), :+, s(:lit, 1))
== DEVELOPER NOTES:
To add a new version:
* New parser should be generated from lib/ruby[3]_parser.yy.
* Extend lib/ruby[3]_parser.yy with new class name.
* Add new version number to V2/V3 in Rakefile for rule creation.
* Add new `ruby_parse "x.y.z"` line to Rakefile for rake compare (line ~300).
* Require generated parser in lib/ruby_parser.rb.
* Add new V## = ::Ruby##Parser; end to ruby_parser.rb (bottom of file).
* Add empty TestRubyParserShared##Plus module and TestRubyParserV## to test/test_ruby_parser.rb.
* Extend Manifest.txt with generated file names.
* Add new version number to sexp_processor's pt_testcase.rb in all_versions
Until all of these are done, you won't have a clean test run.
== REQUIREMENTS:
* ruby. woot.
* sexp_processor for Sexp and SexpProcessor classes, and testing.
* racc full package for parser development (compiling .y to .rb).
== INSTALL:
* sudo gem install ruby_parser
== LICENSE:
(The MIT License)
Copyright (c) Ryan Davis, seattle.rb
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
'Software'), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1 @@
__ENCODING__

View File

@ -0,0 +1 @@
alias $MATCH $&

View File

@ -0,0 +1 @@
alias in out

View File

@ -0,0 +1,3 @@
true and
not false and
true

View File

@ -0,0 +1 @@
[1 => 2]

View File

@ -0,0 +1 @@
[1, 2 => 3]

View File

@ -0,0 +1 @@
def f(a: 1, &b); end

View File

@ -0,0 +1,4 @@
[
'a',
'b']
1

View File

@ -0,0 +1,3 @@
%w[].b
[].b

View File

@ -0,0 +1 @@
{ y: }

View File

@ -0,0 +1 @@
a(b:1)

View File

@ -0,0 +1 @@
A::b = 1

View File

@ -0,0 +1 @@
a[[1, 2]] = 3

View File

@ -0,0 +1 @@
[1, 2, 3, 4][from .. to] = ["a", "b", "c"]

View File

@ -0,0 +1 @@
a.B = 1

View File

@ -0,0 +1 @@
x `#{y}`

View File

@ -0,0 +1 @@
1 != 2

View File

@ -0,0 +1,3 @@
..10
; ..a
; c

View File

@ -0,0 +1,3 @@
...10
; ...a
; c

View File

@ -0,0 +1,3 @@
begin
ensure
end

View File

@ -0,0 +1,9 @@
begin
1
rescue
2
else
3
ensure
4
end

View File

@ -0,0 +1,9 @@
begin
rescue
else
ensure
end

View File

@ -0,0 +1,4 @@
begin
rescue
ensure
end

View File

@ -0,0 +1 @@
def x(&); end

View File

@ -0,0 +1 @@
a { |**b| }

View File

@ -0,0 +1 @@
a { |b, c=1, d, &e| }

View File

@ -0,0 +1 @@
a { |b, c = 1, *d| }

View File

@ -0,0 +1 @@
a { |b, c=1, *d, e, &f| }

View File

@ -0,0 +1 @@
a { |b = 1| }

View File

@ -0,0 +1 @@
a { |b; c| }

View File

@ -0,0 +1 @@
a {|b; c, d| }

View File

@ -0,0 +1 @@
a { |b, *c, d| }

View File

@ -0,0 +1 @@
f { |**kwargs| kwargs }

View File

@ -0,0 +1 @@
f { |**nil| }

View File

@ -0,0 +1 @@
f { |a, b = 42| [a, b] }

View File

@ -0,0 +1 @@
a { | b=1, c=2 | }

View File

@ -0,0 +1 @@
f { |a, b = 42, c = 24| [a, b, c] }

View File

@ -0,0 +1 @@
f { |a, b = 42, c = 24, &d| [a, b, c, d] }

View File

@ -0,0 +1 @@
break foo arg do |bar| end

View File

@ -0,0 +1,4 @@
a def b(c)
d
end
e.f do end

Some files were not shown because too many files have changed in this diff Show More