Manual YARP resync

This commit is contained in:
Jemma Issroff 2023-06-30 14:30:24 -04:00
parent 6f9d1b4b0f
commit bfb933371d
Notes: git 2023-07-05 20:59:16 +00:00
77 changed files with 5222 additions and 5751 deletions

View File

@ -1,17 +1,79 @@
# frozen_string_literal: true
module YARP
# This represents a location in the source corresponding to a node or token.
class Location
attr_reader :start_offset, :length
# This represents a source of Ruby code that has been parsed. It is used in
# conjunction with locations to allow them to resolve line numbers and source
# ranges.
class Source
attr_reader :source, :offsets
def initialize(start_offset, length)
def initialize(source, offsets)
@source = source
@offsets = offsets
end
def slice(offset, length)
source.byteslice(offset, length)
end
def line(value)
offsets.bsearch_index { |offset| offset > value } || offsets.length
end
def column(value)
value - offsets[line(value) - 1]
end
end
# This represents a location in the source.
class Location
# A Source object that is used to determine more information from the given
# offset and length.
private attr_reader :source
# The byte offset from the beginning of the source where this location
# starts.
attr_reader :start_offset
# The length of this location in bytes.
attr_reader :length
def initialize(source, start_offset, length)
@source = source
@start_offset = start_offset
@length = length
end
# The source code that this location represents.
def slice
source.slice(start_offset, length)
end
# The byte offset from the beginning of the source where this location ends.
def end_offset
@start_offset + @length
start_offset + length
end
# The line number where this location starts.
def start_line
source.line(start_offset)
end
# The line number where this location ends.
def end_line
source.line(end_offset - 1)
end
# The column number in bytes where this location starts from the start of
# the line.
def start_column
source.column(start_offset)
end
# The column number in bytes where this location ends from the start of the
# line.
def end_column
source.column(end_offset - 1)
end
def deconstruct_keys(keys)
@ -101,21 +163,12 @@ module YARP
# This represents a token from the Ruby source.
class Token
attr_reader :type, :value, :start_offset, :length
attr_reader :type, :value, :location
def initialize(type, value, start_offset, length)
def initialize(type, value, location)
@type = type
@value = value
@start_offset = start_offset
@length = length
end
def end_offset
@start_offset + @length
end
def location
Location.new(@start_offset, @length)
@location = location
end
def deconstruct_keys(keys)
@ -143,20 +196,12 @@ module YARP
# This represents a node in the tree.
class Node
attr_reader :start_offset, :length
def end_offset
@start_offset + @length
end
def location
Location.new(@start_offset, @length)
end
attr_reader :location
def pretty_print(q)
q.group do
q.text(self.class.name.split("::").last)
self.location.pretty_print(q)
location.pretty_print(q)
q.text("(")
q.nest(2) do
deconstructed = deconstruct_keys([])
@ -171,67 +216,10 @@ module YARP
end
end
# A class that knows how to walk down the tree. None of the individual visit
# methods are implemented on this visitor, so it forces the consumer to
# implement each one that they need. For a default implementation that
# continues walking the tree, see the Visitor class.
class BasicVisitor
def visit(node)
node&.accept(self)
end
def visit_all(nodes)
nodes.map { |node| visit(node) }
end
def visit_child_nodes(node)
visit_all(node.child_nodes)
end
end
# This lexes with the Ripper lex. It drops any space events but otherwise
# returns the same tokens.
# [raises SyntaxError] if the syntax in source is invalid
def self.lex_ripper(source)
previous = []
results = []
Ripper.lex(source, raise_errors: true).each do |token|
case token[1]
when :on_sp
# skip
when :on_tstring_content
if previous[1] == :on_tstring_content &&
(token[2].start_with?("\#$") || token[2].start_with?("\#@"))
previous[2] << token[2]
else
results << token
previous = token
end
when :on_words_sep
if previous[1] == :on_words_sep
previous[2] << token[2]
else
results << token
previous = token
end
else
results << token
previous = token
end
end
results
end
# Load the serialized AST using the source as a reference into a tree.
def self.load(source, serialized)
Serialize.load(source, serialized)
end
def self.parse(source, filepath=nil)
_parse(source, filepath)
end
end
require_relative "yarp/lex_compat"
@ -240,9 +228,3 @@ require_relative "yarp/ripper_compat"
require_relative "yarp/serialize"
require_relative "yarp/pack"
require "yarp.so"
module YARP
class << self
private :_parse
end
end

View File

@ -534,12 +534,11 @@ module YARP
end
end
attr_reader :source, :offsets, :filepath
attr_reader :source, :filepath
def initialize(source, filepath = "")
@source = source
@filepath = filepath || ""
@offsets = find_offsets(source)
end
def result
@ -561,7 +560,8 @@ module YARP
result_value[0][0].value.prepend("\xEF\xBB\xBF") if bom
result_value.each_with_index do |(token, lex_state), index|
(lineno, column) = find_location(token.location.start_offset)
lineno = token.location.start_line
column = token.location.start_column
column -= index == 0 ? 6 : 3 if bom && lineno == 1
event = RIPPER.fetch(token.type)
@ -702,38 +702,6 @@ module YARP
ParseResult.new(tokens, result.comments, result.errors, result.warnings)
end
private
# YARP keeps locations around in the form of ranges of byte offsets from the
# start of the file. Ripper keeps locations around in the form of line and
# column numbers. To match the output, we keep a cache of the offsets at the
# beginning of each line.
def find_offsets(source)
last_offset = 0
offsets = [0]
source.each_line do |line|
last_offset += line.bytesize
offsets << last_offset
end
offsets
end
# Given a byte offset, find the line number and column number that it maps
# to. We use a binary search over the cached offsets to find the line number
# that the offset is on, and then subtract the offset of the previous line
# to find the column number.
def find_location(value)
line_number = offsets.bsearch_index { |offset| offset > value }
line_offset = offsets[line_number - 1] if line_number
[
line_number || offsets.length - 1,
value - (line_offset || offsets.last)
]
end
end
# The constant that wraps the behavior of the lexer to match Ripper's output
@ -746,4 +714,39 @@ module YARP
def self.lex_compat(source, filepath = "")
LexCompat.new(source, filepath).result
end
# This lexes with the Ripper lex. It drops any space events but otherwise
# returns the same tokens. Raises SyntaxError if the syntax in source is
# invalid.
def self.lex_ripper(source)
previous = []
results = []
Ripper.lex(source, raise_errors: true).each do |token|
case token[1]
when :on_sp
# skip
when :on_tstring_content
if previous[1] == :on_tstring_content &&
(token[2].start_with?("\#$") || token[2].start_with?("\#@"))
previous[2] << token[2]
else
results << token
previous = token
end
when :on_words_sep
if previous[1] == :on_words_sep
previous[2] << token[2]
else
results << token
previous = token
end
else
results << token
previous = token
end
end
results
end
end

File diff suppressed because it is too large Load Diff

View File

@ -9,26 +9,30 @@ require "stringio"
module YARP
module Serialize
def self.load(source, serialized)
def self.load(input, serialized)
io = StringIO.new(serialized)
io.set_encoding(Encoding::BINARY)
Loader.new(source, serialized, io).load
Loader.new(input, serialized, io).load
end
class Loader
attr_reader :encoding, :source, :serialized, :io
attr_reader :constant_pool_offset, :constant_pool
attr_reader :encoding, :input, :serialized, :io
attr_reader :constant_pool_offset, :constant_pool, :source
def initialize(source, serialized, io)
def initialize(input, serialized, io)
@encoding = Encoding::UTF_8
@source = source.dup
@input = input.dup
@serialized = serialized
@io = io
@constant_pool_offset = nil
@constant_pool = nil
offsets = [0]
input.b.scan("\n") { offsets << $~.end(0) }
@source = Source.new(input, offsets)
end
def load
@ -36,7 +40,7 @@ module YARP
io.read(3).unpack("C3") => [0, 4, 0]
@encoding = Encoding.find(io.read(load_varint))
@source = source.force_encoding(@encoding).freeze
@input = input.force_encoding(@encoding).freeze
@constant_pool_offset = io.read(4).unpack1("L")
@constant_pool = Array.new(load_varint, nil)
@ -78,7 +82,7 @@ module YARP
end
def load_location
Location.new(load_varint, load_varint)
Location.new(source, load_varint, load_varint)
end
def load_optional_location
@ -95,7 +99,7 @@ module YARP
start = serialized.unpack1("L", offset: offset)
length = serialized.unpack1("L", offset: offset + 4)
constant = source.byteslice(start, length).to_sym
constant = input.byteslice(start, length).to_sym
constant_pool[index] = constant
end
@ -104,262 +108,262 @@ module YARP
def load_node
type = io.getbyte
start_offset, length = load_varint, load_varint
location = load_location
case type
when 1 then
AliasNode.new(load_node, load_node, load_location, start_offset, length)
AliasNode.new(load_node, load_node, load_location, location)
when 2 then
AlternationPatternNode.new(load_node, load_node, load_location, start_offset, length)
AlternationPatternNode.new(load_node, load_node, load_location, location)
when 3 then
AndNode.new(load_node, load_node, load_location, start_offset, length)
AndNode.new(load_node, load_node, load_location, location)
when 4 then
ArgumentsNode.new(Array.new(load_varint) { load_node }, start_offset, length)
ArgumentsNode.new(Array.new(load_varint) { load_node }, location)
when 5 then
ArrayNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, start_offset, length)
ArrayNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, location)
when 6 then
ArrayPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, start_offset, length)
ArrayPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, location)
when 7 then
AssocNode.new(load_node, load_optional_node, load_optional_location, start_offset, length)
AssocNode.new(load_node, load_optional_node, load_optional_location, location)
when 8 then
AssocSplatNode.new(load_optional_node, load_location, start_offset, length)
AssocSplatNode.new(load_optional_node, load_location, location)
when 9 then
BackReferenceReadNode.new(start_offset, length)
BackReferenceReadNode.new(location)
when 10 then
BeginNode.new(load_optional_location, load_optional_node, load_optional_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length)
BeginNode.new(load_optional_location, load_optional_node, load_optional_node, load_optional_node, load_optional_node, load_optional_location, location)
when 11 then
BlockArgumentNode.new(load_optional_node, load_location, start_offset, length)
BlockArgumentNode.new(load_optional_node, load_location, location)
when 12 then
BlockNode.new(Array.new(load_varint) { load_constant }, load_optional_node, load_optional_node, load_location, load_location, start_offset, length)
BlockNode.new(Array.new(load_varint) { load_constant }, load_optional_node, load_optional_node, load_location, load_location, location)
when 13 then
BlockParameterNode.new(load_optional_location, load_location, start_offset, length)
BlockParameterNode.new(load_optional_location, load_location, location)
when 14 then
BlockParametersNode.new(load_optional_node, Array.new(load_varint) { load_location }, load_optional_location, load_optional_location, start_offset, length)
BlockParametersNode.new(load_optional_node, Array.new(load_varint) { load_location }, load_optional_location, load_optional_location, location)
when 15 then
BreakNode.new(load_optional_node, load_location, start_offset, length)
BreakNode.new(load_optional_node, load_location, location)
when 16 then
CallNode.new(load_optional_node, load_optional_location, load_optional_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, load_varint, load_string, start_offset, length)
CallNode.new(load_optional_node, load_optional_location, load_optional_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, load_varint, load_string, location)
when 17 then
CallOperatorAndWriteNode.new(load_node, load_location, load_node, start_offset, length)
CallOperatorAndWriteNode.new(load_node, load_location, load_node, location)
when 18 then
CallOperatorOrWriteNode.new(load_node, load_node, load_location, start_offset, length)
CallOperatorOrWriteNode.new(load_node, load_node, load_location, location)
when 19 then
CallOperatorWriteNode.new(load_node, load_location, load_node, load_constant, start_offset, length)
CallOperatorWriteNode.new(load_node, load_location, load_node, load_constant, location)
when 20 then
CapturePatternNode.new(load_node, load_node, load_location, start_offset, length)
CapturePatternNode.new(load_node, load_node, load_location, location)
when 21 then
CaseNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_location, load_location, start_offset, length)
CaseNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_location, load_location, location)
when 22 then
ClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_location, load_optional_node, load_optional_node, load_location, start_offset, length)
ClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_location, load_optional_node, load_optional_node, load_location, location)
when 23 then
ClassVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
ClassVariableOperatorAndWriteNode.new(load_location, load_location, load_node, location)
when 24 then
ClassVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
ClassVariableOperatorOrWriteNode.new(load_location, load_location, load_node, location)
when 25 then
ClassVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
ClassVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
when 26 then
ClassVariableReadNode.new(start_offset, length)
ClassVariableReadNode.new(location)
when 27 then
ClassVariableWriteNode.new(load_location, load_optional_node, load_optional_location, start_offset, length)
ClassVariableWriteNode.new(load_location, load_optional_node, load_optional_location, location)
when 28 then
ConstantOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
ConstantOperatorAndWriteNode.new(load_location, load_location, load_node, location)
when 29 then
ConstantOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
ConstantOperatorOrWriteNode.new(load_location, load_location, load_node, location)
when 30 then
ConstantOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
ConstantOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
when 31 then
ConstantPathNode.new(load_optional_node, load_node, load_location, start_offset, length)
ConstantPathNode.new(load_optional_node, load_node, load_location, location)
when 32 then
ConstantPathOperatorAndWriteNode.new(load_node, load_location, load_node, start_offset, length)
ConstantPathOperatorAndWriteNode.new(load_node, load_location, load_node, location)
when 33 then
ConstantPathOperatorOrWriteNode.new(load_node, load_location, load_node, start_offset, length)
ConstantPathOperatorOrWriteNode.new(load_node, load_location, load_node, location)
when 34 then
ConstantPathOperatorWriteNode.new(load_node, load_location, load_node, load_constant, start_offset, length)
ConstantPathOperatorWriteNode.new(load_node, load_location, load_node, load_constant, location)
when 35 then
ConstantPathWriteNode.new(load_node, load_optional_location, load_optional_node, start_offset, length)
ConstantPathWriteNode.new(load_node, load_optional_location, load_optional_node, location)
when 36 then
ConstantReadNode.new(start_offset, length)
ConstantReadNode.new(location)
when 37 then
load_serialized_length
DefNode.new(load_location, load_optional_node, load_optional_node, load_optional_node, Array.new(load_varint) { load_constant }, load_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, start_offset, length)
DefNode.new(load_location, load_optional_node, load_optional_node, load_optional_node, Array.new(load_varint) { load_constant }, load_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, location)
when 38 then
DefinedNode.new(load_optional_location, load_node, load_optional_location, load_location, start_offset, length)
DefinedNode.new(load_optional_location, load_node, load_optional_location, load_location, location)
when 39 then
ElseNode.new(load_location, load_optional_node, load_optional_location, start_offset, length)
ElseNode.new(load_location, load_optional_node, load_optional_location, location)
when 40 then
EmbeddedStatementsNode.new(load_location, load_optional_node, load_location, start_offset, length)
EmbeddedStatementsNode.new(load_location, load_optional_node, load_location, location)
when 41 then
EmbeddedVariableNode.new(load_location, load_node, start_offset, length)
EmbeddedVariableNode.new(load_location, load_node, location)
when 42 then
EnsureNode.new(load_location, load_optional_node, load_location, start_offset, length)
EnsureNode.new(load_location, load_optional_node, load_location, location)
when 43 then
FalseNode.new(start_offset, length)
FalseNode.new(location)
when 44 then
FindPatternNode.new(load_optional_node, load_node, Array.new(load_varint) { load_node }, load_node, load_optional_location, load_optional_location, start_offset, length)
FindPatternNode.new(load_optional_node, load_node, Array.new(load_varint) { load_node }, load_node, load_optional_location, load_optional_location, location)
when 45 then
FloatNode.new(start_offset, length)
FloatNode.new(location)
when 46 then
ForNode.new(load_node, load_node, load_optional_node, load_location, load_location, load_optional_location, load_location, start_offset, length)
ForNode.new(load_node, load_node, load_optional_node, load_location, load_location, load_optional_location, load_location, location)
when 47 then
ForwardingArgumentsNode.new(start_offset, length)
ForwardingArgumentsNode.new(location)
when 48 then
ForwardingParameterNode.new(start_offset, length)
ForwardingParameterNode.new(location)
when 49 then
ForwardingSuperNode.new(load_optional_node, start_offset, length)
ForwardingSuperNode.new(load_optional_node, location)
when 50 then
GlobalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
GlobalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, location)
when 51 then
GlobalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
GlobalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, location)
when 52 then
GlobalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
GlobalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
when 53 then
GlobalVariableReadNode.new(start_offset, length)
GlobalVariableReadNode.new(location)
when 54 then
GlobalVariableWriteNode.new(load_location, load_optional_location, load_optional_node, start_offset, length)
GlobalVariableWriteNode.new(load_location, load_optional_location, load_optional_node, location)
when 55 then
HashNode.new(load_location, Array.new(load_varint) { load_node }, load_location, start_offset, length)
HashNode.new(load_location, Array.new(load_varint) { load_node }, load_location, location)
when 56 then
HashPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_location, load_optional_location, start_offset, length)
HashPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_location, load_optional_location, location)
when 57 then
IfNode.new(load_optional_location, load_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length)
IfNode.new(load_optional_location, load_node, load_optional_node, load_optional_node, load_optional_location, location)
when 58 then
ImaginaryNode.new(load_node, start_offset, length)
ImaginaryNode.new(load_node, location)
when 59 then
InNode.new(load_node, load_optional_node, load_location, load_optional_location, start_offset, length)
InNode.new(load_node, load_optional_node, load_location, load_optional_location, location)
when 60 then
InstanceVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
InstanceVariableOperatorAndWriteNode.new(load_location, load_location, load_node, location)
when 61 then
InstanceVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
InstanceVariableOperatorOrWriteNode.new(load_location, load_location, load_node, location)
when 62 then
InstanceVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
InstanceVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
when 63 then
InstanceVariableReadNode.new(start_offset, length)
InstanceVariableReadNode.new(location)
when 64 then
InstanceVariableWriteNode.new(load_location, load_optional_node, load_optional_location, start_offset, length)
InstanceVariableWriteNode.new(load_location, load_optional_node, load_optional_location, location)
when 65 then
IntegerNode.new(start_offset, length)
IntegerNode.new(location)
when 66 then
InterpolatedRegularExpressionNode.new(load_location, Array.new(load_varint) { load_node }, load_location, load_varint, start_offset, length)
InterpolatedRegularExpressionNode.new(load_location, Array.new(load_varint) { load_node }, load_location, load_varint, location)
when 67 then
InterpolatedStringNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, start_offset, length)
InterpolatedStringNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, location)
when 68 then
InterpolatedSymbolNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, start_offset, length)
InterpolatedSymbolNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, location)
when 69 then
InterpolatedXStringNode.new(load_location, Array.new(load_varint) { load_node }, load_location, start_offset, length)
InterpolatedXStringNode.new(load_location, Array.new(load_varint) { load_node }, load_location, location)
when 70 then
KeywordHashNode.new(Array.new(load_varint) { load_node }, start_offset, length)
KeywordHashNode.new(Array.new(load_varint) { load_node }, location)
when 71 then
KeywordParameterNode.new(load_location, load_optional_node, start_offset, length)
KeywordParameterNode.new(load_location, load_optional_node, location)
when 72 then
KeywordRestParameterNode.new(load_location, load_optional_location, start_offset, length)
KeywordRestParameterNode.new(load_location, load_optional_location, location)
when 73 then
LambdaNode.new(Array.new(load_varint) { load_constant }, load_location, load_optional_node, load_optional_node, start_offset, length)
LambdaNode.new(Array.new(load_varint) { load_constant }, load_location, load_optional_node, load_optional_node, location)
when 74 then
LocalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
LocalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, load_constant, location)
when 75 then
LocalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
LocalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, load_constant, location)
when 76 then
LocalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, load_constant, start_offset, length)
LocalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, load_constant, location)
when 77 then
LocalVariableReadNode.new(load_constant, load_varint, start_offset, length)
LocalVariableReadNode.new(load_constant, load_varint, location)
when 78 then
LocalVariableWriteNode.new(load_constant, load_varint, load_optional_node, load_location, load_optional_location, start_offset, length)
LocalVariableWriteNode.new(load_constant, load_varint, load_optional_node, load_location, load_optional_location, location)
when 79 then
MatchPredicateNode.new(load_node, load_node, load_location, start_offset, length)
MatchPredicateNode.new(load_node, load_node, load_location, location)
when 80 then
MatchRequiredNode.new(load_node, load_node, load_location, start_offset, length)
MatchRequiredNode.new(load_node, load_node, load_location, location)
when 81 then
MissingNode.new(start_offset, length)
MissingNode.new(location)
when 82 then
ModuleNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_node, load_location, start_offset, length)
ModuleNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_node, load_location, location)
when 83 then
MultiWriteNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_location, load_optional_location, start_offset, length)
MultiWriteNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_location, load_optional_location, location)
when 84 then
NextNode.new(load_optional_node, load_location, start_offset, length)
NextNode.new(load_optional_node, load_location, location)
when 85 then
NilNode.new(start_offset, length)
NilNode.new(location)
when 86 then
NoKeywordsParameterNode.new(load_location, load_location, start_offset, length)
NoKeywordsParameterNode.new(load_location, load_location, location)
when 87 then
NumberedReferenceReadNode.new(start_offset, length)
NumberedReferenceReadNode.new(location)
when 88 then
OptionalParameterNode.new(load_constant, load_location, load_location, load_node, start_offset, length)
OptionalParameterNode.new(load_constant, load_location, load_location, load_node, location)
when 89 then
OrNode.new(load_node, load_node, load_location, start_offset, length)
OrNode.new(load_node, load_node, load_location, location)
when 90 then
ParametersNode.new(Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_node, start_offset, length)
ParametersNode.new(Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_node, location)
when 91 then
ParenthesesNode.new(load_optional_node, load_location, load_location, start_offset, length)
ParenthesesNode.new(load_optional_node, load_location, load_location, location)
when 92 then
PinnedExpressionNode.new(load_node, load_location, load_location, load_location, start_offset, length)
PinnedExpressionNode.new(load_node, load_location, load_location, load_location, location)
when 93 then
PinnedVariableNode.new(load_node, load_location, start_offset, length)
PinnedVariableNode.new(load_node, load_location, location)
when 94 then
PostExecutionNode.new(load_optional_node, load_location, load_location, load_location, start_offset, length)
PostExecutionNode.new(load_optional_node, load_location, load_location, load_location, location)
when 95 then
PreExecutionNode.new(load_optional_node, load_location, load_location, load_location, start_offset, length)
PreExecutionNode.new(load_optional_node, load_location, load_location, load_location, location)
when 96 then
ProgramNode.new(Array.new(load_varint) { load_constant }, load_node, start_offset, length)
ProgramNode.new(Array.new(load_varint) { load_constant }, load_node, location)
when 97 then
RangeNode.new(load_optional_node, load_optional_node, load_location, load_varint, start_offset, length)
RangeNode.new(load_optional_node, load_optional_node, load_location, load_varint, location)
when 98 then
RationalNode.new(load_node, start_offset, length)
RationalNode.new(load_node, location)
when 99 then
RedoNode.new(start_offset, length)
RedoNode.new(location)
when 100 then
RegularExpressionNode.new(load_location, load_location, load_location, load_string, load_varint, start_offset, length)
RegularExpressionNode.new(load_location, load_location, load_location, load_string, load_varint, location)
when 101 then
RequiredDestructuredParameterNode.new(Array.new(load_varint) { load_node }, load_location, load_location, start_offset, length)
RequiredDestructuredParameterNode.new(Array.new(load_varint) { load_node }, load_location, load_location, location)
when 102 then
RequiredParameterNode.new(load_constant, start_offset, length)
RequiredParameterNode.new(load_constant, location)
when 103 then
RescueModifierNode.new(load_node, load_location, load_node, start_offset, length)
RescueModifierNode.new(load_node, load_location, load_node, location)
when 104 then
RescueNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_node, load_optional_node, start_offset, length)
RescueNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_node, load_optional_node, location)
when 105 then
RestParameterNode.new(load_location, load_optional_location, start_offset, length)
RestParameterNode.new(load_location, load_optional_location, location)
when 106 then
RetryNode.new(start_offset, length)
RetryNode.new(location)
when 107 then
ReturnNode.new(load_location, load_optional_node, start_offset, length)
ReturnNode.new(load_location, load_optional_node, location)
when 108 then
SelfNode.new(start_offset, length)
SelfNode.new(location)
when 109 then
SingletonClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_location, load_node, load_optional_node, load_location, start_offset, length)
SingletonClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_location, load_node, load_optional_node, load_location, location)
when 110 then
SourceEncodingNode.new(start_offset, length)
SourceEncodingNode.new(location)
when 111 then
SourceFileNode.new(load_string, start_offset, length)
SourceFileNode.new(load_string, location)
when 112 then
SourceLineNode.new(start_offset, length)
SourceLineNode.new(location)
when 113 then
SplatNode.new(load_location, load_optional_node, start_offset, length)
SplatNode.new(load_location, load_optional_node, location)
when 114 then
StatementsNode.new(Array.new(load_varint) { load_node }, start_offset, length)
StatementsNode.new(Array.new(load_varint) { load_node }, location)
when 115 then
StringConcatNode.new(load_node, load_node, start_offset, length)
StringConcatNode.new(load_node, load_node, location)
when 116 then
StringNode.new(load_optional_location, load_location, load_optional_location, load_string, start_offset, length)
StringNode.new(load_optional_location, load_location, load_optional_location, load_string, location)
when 117 then
SuperNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, start_offset, length)
SuperNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, location)
when 118 then
SymbolNode.new(load_optional_location, load_location, load_optional_location, load_string, start_offset, length)
SymbolNode.new(load_optional_location, load_location, load_optional_location, load_string, location)
when 119 then
TrueNode.new(start_offset, length)
TrueNode.new(location)
when 120 then
UndefNode.new(Array.new(load_varint) { load_node }, load_location, start_offset, length)
UndefNode.new(Array.new(load_varint) { load_node }, load_location, location)
when 121 then
UnlessNode.new(load_location, load_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length)
UnlessNode.new(load_location, load_node, load_optional_node, load_optional_node, load_optional_location, location)
when 122 then
UntilNode.new(load_location, load_node, load_optional_node, start_offset, length)
UntilNode.new(load_location, load_node, load_optional_node, location)
when 123 then
WhenNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_node, start_offset, length)
WhenNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_node, location)
when 124 then
WhileNode.new(load_location, load_node, load_optional_node, start_offset, length)
WhileNode.new(load_location, load_node, load_optional_node, location)
when 125 then
XStringNode.new(load_location, load_location, load_location, load_string, start_offset, length)
XStringNode.new(load_location, load_location, load_location, load_string, location)
when 126 then
YieldNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, start_offset, length)
YieldNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, location)
end
end
end

View File

@ -1,212 +0,0 @@
# frozen_string_literal: true
require "yarp_test_helper"
class CompileTest < Test::Unit::TestCase
def test_AliasNode
assert_compiles("alias foo bar")
end
def test_AndNode
assert_compiles("true && false")
end
def test_ArrayNode
assert_compiles("[]")
assert_compiles("[foo, bar, baz]")
end
def test_AssocNode
assert_compiles("{ foo: bar }")
end
def test_BlockNode
assert_compiles("foo { bar }")
end
def test_BlockNode_with_optionals
assert_compiles("foo { |x = 1| bar }")
end
def test_CallNode
assert_compiles("foo")
assert_compiles("foo(bar)")
end
def test_ClassVariableReadNode
assert_compiles("@@foo")
end
def test_ClassVariableWriteNode
assert_compiles("@@foo = 1")
end
def test_FalseNode
assert_compiles("false")
end
def test_GlobalVariableReadNode
assert_compiles("$foo")
end
def test_GlobalVariableWriteNode
assert_compiles("$foo = 1")
end
def test_HashNode
assert_compiles("{ foo: bar }")
end
def test_InstanceVariableReadNode
assert_compiles("@foo")
end
def test_InstanceVariableWriteNode
assert_compiles("@foo = 1")
end
def test_IntegerNode
assert_compiles("1")
assert_compiles("1_000")
end
def test_InterpolatedStringNode
assert_compiles("\"foo \#{bar} baz\"")
end
def test_LocalVariableWriteNode
assert_compiles("foo = 1")
end
def test_LocalVariableReadNode
assert_compiles("[foo = 1, foo]")
end
def test_NilNode
assert_compiles("nil")
end
def test_OrNode
assert_compiles("true || false")
end
def test_ParenthesesNode
assert_compiles("()")
end
def test_ProgramNode
assert_compiles("")
end
def test_RangeNode
assert_compiles("foo..bar")
assert_compiles("foo...bar")
assert_compiles("(foo..)")
assert_compiles("(foo...)")
assert_compiles("(..bar)")
assert_compiles("(...bar)")
end
def test_SelfNode
assert_compiles("self")
end
def test_StringNode
assert_compiles("\"foo\"")
end
def test_SymbolNode
assert_compiles(":foo")
end
def test_TrueNode
assert_compiles("true")
end
def test_UndefNode
assert_compiles("undef :foo, :bar, :baz")
end
def test_XStringNode
assert_compiles("`foo`")
end
private
def assert_compiles(source)
assert_equal_iseqs(rubyvm_compile(source), YARP.compile(source))
end
# Instruction sequences have 13 elements in their lists. We don't currently
# support all of the fields, so we can't compare the iseqs directly. Instead,
# we compare the elements that we do support.
def assert_equal_iseqs(expected, actual)
# The first element is the magic comment string.
assert_equal expected[0], actual[0]
# The next three elements are the major, minor, and patch version numbers.
# TODO: Insert this check once Ruby 3.3 is released, and the TruffleRuby
# GitHub workflow also checks against Ruby 3.3
# assert_equal expected[1...4], actual[1...4]
# The next element is a set of options for the iseq. It has lots of
# different information, some of which we support and some of which we
# don't.
assert_equal expected[4][:arg_size], actual[4][:arg_size], "Unexpected difference in arg_size"
assert_equal expected[4][:stack_max], actual[4][:stack_max], "Unexpected difference in stack_max"
assert_kind_of Integer, actual[4][:local_size]
assert_kind_of Integer, actual[4][:node_id]
assert_equal expected[4][:code_location].length, actual[4][:code_location].length, "Unexpected difference in code_location length"
assert_equal expected[4][:node_ids].length, actual[4][:node_ids].length, "Unexpected difference in node_ids length"
# Then we have the name of the iseq, the relative file path, the absolute
# file path, and the line number. We don't have this working quite yet.
assert_kind_of String, actual[5]
assert_kind_of String, actual[6]
assert_kind_of String, actual[7]
assert_kind_of Integer, actual[8]
# Next we have the type of the iseq.
assert_equal expected[9], actual[9]
# Next we have the list of local variables. We don't support this yet.
assert_kind_of Array, actual[10]
# Next we have the argument options. These are used in block and method
# iseqs to reflect how the arguments are passed.
assert_equal expected[11], actual[11], "Unexpected difference in argument options"
# Next we have the catch table entries. We don't have this working yet.
assert_kind_of Array, actual[12]
# Finally we have the actual instructions. We support some of this, but omit
# line numbers and some tracepoint events.
expected[13].each do |insn|
case insn
in [:send, opnds, expected_block] unless expected_block.nil?
actual[13].shift => [:send, ^(opnds), actual_block]
assert_equal_iseqs expected_block, actual_block
in Array | :RUBY_EVENT_B_CALL | :RUBY_EVENT_B_RETURN | /^label_\d+/
assert_equal insn, actual[13].shift
in Integer | /^RUBY_EVENT_/
# skip these for now
else
flunk "Unexpected instruction: #{insn.inspect}"
end
end
end
def rubyvm_compile(source)
options = {
peephole_optimization: false,
specialized_instruction: false,
operands_unification: false,
instructions_unification: false,
frozen_string_literal: false
}
RubyVM::InstructionSequence.compile(source, **options).to_a
end
end

View File

@ -55,6 +55,21 @@ class EncodingTest < Test::Unit::TestCase
assert_equal Encoding.find("utf-8"), actual
end
# This test may be a little confusing. Basically when we use our strpbrk, it
# takes into account the encoding of the file.
def test_strpbrk_multibyte
result = YARP.parse(<<~RUBY)
# encoding: Shift_JIS
%w[\x81\x5c]
RUBY
assert(result.errors.empty?)
assert_equal(
(+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
result.value.statements.body.first.elements.first.unescaped
)
end
def test_utf_8_variations
%w[
utf-8-unix

View File

@ -18,3 +18,16 @@ not foo and
bar
not(foo
)
not(
foo
)

View File

@ -76,6 +76,7 @@ foo => Foo(*bar, baz, *qux)
foo => Foo[]
foo => Foo[1]
foo => Foo[1, 2, 3]
foo => Foo[Foo[]]
foo => Foo[bar]
foo => Foo[*bar, baz]
foo => Foo[bar, *baz]

View File

@ -1,363 +0,0 @@
# frozen_string_literal: true
require_relative "yarp_test_helper"
require "yarp/language_server"
module YARP
class LanguageServerTest < Test::Unit::TestCase
module Request
# Represents a hash pattern.
class Shape
attr_reader :values
def initialize(values)
@values = values
end
def ===(other)
values.all? do |key, value|
value == :any ? other.key?(key) : value === other[key]
end
end
end
# Represents an array pattern.
class Tuple
attr_reader :values
def initialize(values)
@values = values
end
def ===(other)
values.each_with_index.all? { |value, index| value === other[index] }
end
end
def self.[](value)
case value
when Array
Tuple.new(value.map { |child| self[child] })
when Hash
Shape.new(value.transform_values { |child| self[child] })
else
value
end
end
end
class Initialize < Struct.new(:id)
def to_hash
{ method: "initialize", id: id }
end
end
class Shutdown < Struct.new(:id)
def to_hash
{ method: "shutdown", id: id }
end
end
class TextDocumentDidOpen < Struct.new(:uri, :text)
def to_hash
{
method: "textDocument/didOpen",
params: { textDocument: { uri: uri, text: text } }
}
end
end
class TextDocumentDidChange < Struct.new(:uri, :text)
def to_hash
{
method: "textDocument/didChange",
params: {
textDocument: { uri: uri },
contentChanges: [{ text: text }]
}
}
end
end
class TextDocumentDidClose < Struct.new(:uri)
def to_hash
{
method: "textDocument/didClose",
params: { textDocument: { uri: uri } }
}
end
end
class TextDocumentCodeAction < Struct.new(:id, :uri, :diagnostics)
def to_hash
{
method: "textDocument/codeAction",
id: id,
params: {
textDocument: { uri: uri },
context: {
diagnostics: diagnostics,
},
},
}
end
end
class TextDocumentDiagnostic < Struct.new(:id, :uri)
def to_hash
{
method: "textDocument/diagnostic",
id: id,
params: {
textDocument: { uri: uri },
}
}
end
end
def test_reading_file
Tempfile.create(%w[test- .rb]) do |file|
file.write("class Foo; end")
file.rewind
responses = run_server([
Initialize.new(1),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
end
end
def test_clean_shutdown
responses = run_server([Initialize.new(1), Shutdown.new(2)])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 2, result: {} }
]]
assert_operator(shape, :===, responses)
end
def test_file_that_does_not_exist
responses = run_server([
Initialize.new(1),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
end
def test_code_action_request
message = "this is an error"
diagnostic = {
range: { start: { line: 0, character: 0 }, end: { line: 0, character: 0 } },
message: message,
severity: 1,
}
responses = run_server([
Initialize.new(1),
TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
1 + (
RUBY
TextDocumentCodeAction.new(2, "file:///path/to/file.rb", [diagnostic]),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 2, result: [
{
title: "Report incorrect error: `#{message}`",
kind: "quickfix",
diagnostics: [diagnostic],
command: {
title: "Report incorrect error",
command: "vscode.open",
arguments: [String]
}
}
],
},
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
assert(responses.dig(1, :result, 0, :command, :arguments, 0).include?(URI.encode_www_form_component(message)))
end
def test_code_action_request_no_diagnostic
responses = run_server([
Initialize.new(1),
TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
1 + (
RUBY
TextDocumentCodeAction.new(2, "file:///path/to/file.rb", []),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 2, result: [] },
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
end
def test_code_action_request_no_content
message = "this is an error"
diagnostic = {
range: { start: { line: 0, character: 0 }, end: { line: 0, character: 0 } },
message: message,
severity: 1,
}
responses = run_server([
Initialize.new(1),
TextDocumentCodeAction.new(2, "file:///path/to/file.rb", [diagnostic]),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 2, result: nil },
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
end
def test_diagnostics_request_error
responses = run_server([
Initialize.new(1),
TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
1 + (
RUBY
TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 2, result: { kind: "full", items: [
{
range: {
start: { line: Integer, character: Integer },
end: { line: Integer, character: Integer }
},
message: String,
severity: Integer
},
] } },
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
assert(responses.dig(1, :result, :items).count { |item| item[:severity] == 1 } > 0)
end
def test_diagnostics_request_warning
responses = run_server([
Initialize.new(1),
TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
a/b /c
RUBY
TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 2, result: { kind: "full", items: [
{
range: {
start: { line: Integer, character: Integer },
end: { line: Integer, character: Integer }
},
message: String,
severity: Integer
},
] } },
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
assert(responses.dig(1, :result, :items).count { |item| item[:severity] == 2 } > 0)
end
def test_diagnostics_request_nothing
responses = run_server([
Initialize.new(1),
TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
a = 1
RUBY
TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 2, result: { kind: "full", items: [] } },
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
assert_equal(0, responses.dig(1, :result, :items).size)
end
def test_diagnostics_request_no_content
responses = run_server([
Initialize.new(1),
TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 2, result: nil },
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
end
private
def write(content)
request = content.to_hash.merge(jsonrpc: "2.0").to_json
"Content-Length: #{request.bytesize}\r\n\r\n#{request}"
end
def read(content)
[].tap do |messages|
while (headers = content.gets("\r\n\r\n"))
source = content.read(headers[/Content-Length: (\d+)/i, 1].to_i)
messages << JSON.parse(source, symbolize_names: true)
end
end
end
def run_server(messages)
input = StringIO.new(messages.map { |message| write(message) }.join)
output = StringIO.new
LanguageServer.new(
input: input,
output: output,
).run
read(output.tap(&:rewind))
end
end
end

View File

@ -3,10 +3,12 @@
require "yarp_test_helper"
class ParseTest < Test::Unit::TestCase
# Because we're reading the snapshots from disk, we need to make sure that
# they're encoded as UTF-8. When certain settings are present this might not
# always be the case (e.g., LANG=C or -Eascii-8bit). So here we force the
# default external encoding for the duration of the test.
# When we pretty-print the trees to compare against the snapshots, we want to
# be certain that we print with the same external encoding. This is because
# methods like Symbol#inspect take into account external encoding and it could
# change how the snapshot is generated. On machines with certain settings
# (like LANG=C or -Eascii-8bit) this could have been changed. So here we're
# going to force it to be UTF-8 to keep the snapshots consistent.
def setup
@previous_default_external = Encoding.default_external
ignore_warnings { Encoding.default_external = Encoding::UTF_8 }
@ -29,20 +31,6 @@ class ParseTest < Test::Unit::TestCase
seattlerb/pct_w_heredoc_interp_nested.txt
]
# Because the filepath in SourceFileNodes is different from one maching to the
# next, PP.pp(sexp, +"", 79) can have different results: both the path itself
# and the line breaks based on the length of the path.
def normalize_printed(printed)
printed
.gsub(
/SourceFileNode \s*
\(\s* (\d+\.\.\.\d+) \s*\) \s*
\(\s* ("[^"]*") \s*\)
/mx,
'SourceFileNode(\1)(\2)')
.gsub(__dir__, "")
end
def find_source_file_node(node)
if node.is_a?(YARP::SourceFileNode)
node
@ -79,27 +67,26 @@ class ParseTest < Test::Unit::TestCase
# that is invalid Ruby.
refute_nil Ripper.sexp_raw(source)
# Next, parse the source and print the value.
result = YARP.parse_file(filepath)
value = result.value
printed = normalize_printed(PP.pp(value, +"", 79))
# Next, assert that there were no errors during parsing.
assert_empty result.errors, value
result = YARP.parse(source, relative)
assert_empty result.errors
# Next, pretty print the source.
printed = PP.pp(result.value, +"", 79)
if File.exist?(snapshot)
normalized = normalize_printed(File.read(snapshot))
saved = File.read(snapshot)
# If the snapshot file exists, but the printed value does not match the
# snapshot, then update the snapshot file.
if normalized != printed
File.write(snapshot, normalized)
if printed != saved
File.write(snapshot, printed)
warn("Updated snapshot at #{snapshot}.")
end
# If the snapshot file exists, then assert that the printed value
# matches the snapshot.
assert_equal(normalized, printed)
assert_equal(saved, printed)
else
# If the snapshot file does not yet exist, then write it out now.
File.write(snapshot, printed)
@ -108,11 +95,11 @@ class ParseTest < Test::Unit::TestCase
# Next, assert that the value can be serialized and deserialized without
# changing the shape of the tree.
assert_equal_nodes(value, YARP.load(source, YARP.dump(source, filepath)))
assert_equal_nodes(result.value, YARP.load(source, YARP.dump(source, relative)))
# Next, assert that the newlines are in the expected places.
expected_newlines = [0]
source.b.scan("\n") { expected_newlines << $~.offset(0)[0] }
source.b.scan("\n") { expected_newlines << $~.offset(0)[0] + 1 }
assert_equal expected_newlines, YARP.newlines(source)
# Finally, assert that we can lex the source and get the same tokens as

View File

@ -101,6 +101,10 @@ class RegexpTest < Test::Unit::TestCase
refute_nil(YARP.named_captures("(?#foo)"))
end
def test_comments_with_escaped_parentheses
refute_nil(YARP.named_captures("(?#foo\\)\\))"))
end
def test_non_capturing_groups
refute_nil(YARP.named_captures("(?:foo)"))
end

View File

@ -102,7 +102,7 @@ ProgramNode(0...185)(
StringNode(123...129)((123...125), (125...128), (128...129), "abc"),
DefNode(131...149)(
(144...145),
SourceFileNode(135...143)("/fixtures/keyword_method_names.txt"),
SourceFileNode(135...143)("keyword_method_names.txt"),
nil,
nil,
[],

View File

@ -5,7 +5,7 @@ ProgramNode(0...51)(
RetryNode(6...11)(),
SelfNode(13...17)(),
SourceEncodingNode(19...31)(),
SourceFileNode(33...41)("/fixtures/keywords.txt"),
SourceFileNode(33...41)("keywords.txt"),
SourceLineNode(43...51)()]
)
)

View File

@ -1,6 +1,6 @@
ProgramNode(0...125)(
ProgramNode(0...156)(
[],
StatementsNode(0...125)(
StatementsNode(0...156)(
[AndNode(0...19)(
CallNode(0...7)(
CallNode(4...7)(nil, nil, (4...7), nil, nil, nil, nil, 0, "foo"),
@ -146,6 +146,48 @@ ProgramNode(0...125)(
"!"
),
(108...111)
),
CallNode(127...138)(
CallNode(131...134)(
nil,
nil,
(131...134),
nil,
nil,
nil,
nil,
0,
"foo"
),
nil,
(127...130),
(130...131),
nil,
(137...138),
nil,
0,
"!"
),
CallNode(140...156)(
CallNode(147...150)(
nil,
nil,
(147...150),
nil,
nil,
nil,
nil,
0,
"foo"
),
nil,
(140...143),
(143...144),
nil,
(155...156),
nil,
0,
"!"
)]
)
)

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@ ProgramNode(0...38)(
[],
StatementsNode(0...38)(
[SourceEncodingNode(0...12)(),
SourceFileNode(13...21)("/fixtures/unparser/corpus/literal/pragma.txt"),
SourceFileNode(13...21)("unparser/corpus/literal/pragma.txt"),
SourceLineNode(22...30)(),
CallNode(31...38)(nil, nil, (31...38), nil, nil, nil, nil, 0, "__dir__")]
)

View File

@ -3,7 +3,9 @@ ProgramNode(8...111)(
StatementsNode(8...111)(
[CaseNode(8...111)(
ArrayNode(13...51)(
[SourceFileNode(14...22)("/fixtures/whitequark/pattern_matching__FILE__LINE_literals.txt"),
[SourceFileNode(14...22)(
"whitequark/pattern_matching__FILE__LINE_literals.txt"
),
CallNode(24...36)(
SourceLineNode(24...32)(),
nil,
@ -22,7 +24,9 @@ ProgramNode(8...111)(
[InNode(62...99)(
ArrayPatternNode(65...99)(
nil,
[SourceFileNode(66...74)("/fixtures/whitequark/pattern_matching__FILE__LINE_literals.txt"),
[SourceFileNode(66...74)(
"whitequark/pattern_matching__FILE__LINE_literals.txt"
),
SourceLineNode(76...84)(),
SourceEncodingNode(86...98)()],
nil,

View File

@ -1,6 +1,6 @@
ProgramNode(0...8)(
[],
StatementsNode(0...8)(
[SourceFileNode(0...8)("/fixtures/whitequark/string___FILE__.txt")]
[SourceFileNode(0...8)("whitequark/string___FILE__.txt")]
)
)

File diff suppressed because it is too large Load Diff

View File

@ -9,14 +9,13 @@
#define YARP_AST_H
#include "yarp/defines.h"
#include "yarp/util/yp_constant_pool.h"
#include "yarp/util/yp_string.h"
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include "yarp/util/yp_constant_pool.h"
#include "yarp/util/yp_string.h"
// This enum represents every type of token in the Ruby source.
typedef enum yp_token_type {
YP_TOKEN_EOF = 1, // final token in the file

View File

@ -1,826 +0,0 @@
#include "yarp/extension.h"
typedef enum {
YP_ISEQ_TYPE_TOP,
YP_ISEQ_TYPE_BLOCK
} yp_iseq_type_t;
typedef enum {
YP_RUBY_EVENT_B_CALL,
YP_RUBY_EVENT_B_RETURN
} yp_ruby_event_t;
typedef struct yp_iseq_compiler {
// This is the parent compiler. It is used to communicate between ISEQs that
// need to be able to jump back to the parent ISEQ.
struct yp_iseq_compiler *parent;
// This is the list of local variables that are defined on this scope.
yp_constant_id_list_t *locals;
// This is the instruction sequence that we are compiling. It's actually just
// a Ruby array that maps to the output of RubyVM::InstructionSequence#to_a.
VALUE insns;
// This is a list of IDs coming from the instructions that are being compiled.
// In theory they should be deterministic, but we don't have that
// functionality yet. Fortunately you can pass -1 for all of them and
// everything for the most part continues to work.
VALUE node_ids;
// This is the current size of the instruction sequence's stack.
int stack_size;
// This is the maximum size of the instruction sequence's stack.
int stack_max;
// This is the name of the instruction sequence.
const char *name;
// This is the type of the instruction sequence.
yp_iseq_type_t type;
// This is the optional argument information.
VALUE optionals;
// This is the number of arguments.
int arg_size;
// This is the current size of the instruction sequence's instructions and
// operands.
size_t size;
// This is the index of the current inline storage.
size_t inline_storage_index;
} yp_iseq_compiler_t;
static void
yp_iseq_compiler_init(yp_iseq_compiler_t *compiler, yp_iseq_compiler_t *parent, yp_constant_id_list_t *locals, const char *name, yp_iseq_type_t type) {
*compiler = (yp_iseq_compiler_t) {
.parent = parent,
.locals = locals,
.insns = rb_ary_new(),
.node_ids = rb_ary_new(),
.stack_size = 0,
.stack_max = 0,
.name = name,
.type = type,
.optionals = rb_hash_new(),
.arg_size = 0,
.size = 0,
.inline_storage_index = 0
};
}
/******************************************************************************/
/* Utilities */
/******************************************************************************/
static inline int
sizet2int(size_t value) {
if (value > INT_MAX) rb_raise(rb_eRuntimeError, "value too large");
return (int) value;
}
static int
local_index(yp_iseq_compiler_t *compiler, yp_constant_id_t constant_id, int depth) {
int compiler_index;
yp_iseq_compiler_t *local_compiler = compiler;
for (compiler_index = 0; compiler_index < depth; compiler_index++) {
local_compiler = local_compiler->parent;
assert(local_compiler != NULL);
}
size_t index;
for (index = 0; index < local_compiler->locals->size; index++) {
if (local_compiler->locals->ids[index] == constant_id) {
return sizet2int(local_compiler->locals->size - index + 2);
}
}
return -1;
}
/******************************************************************************/
/* Parse specific VALUEs from strings */
/******************************************************************************/
static VALUE
parse_number(const char *start, const char *end) {
size_t length = end - start;
char *buffer = alloca(length + 1);
memcpy(buffer, start, length);
buffer[length] = '\0';
return rb_cstr_to_inum(buffer, -10, Qfalse);
}
static inline VALUE
parse_string(yp_string_t *string) {
return rb_str_new(yp_string_source(string), yp_string_length(string));
}
static inline ID
parse_symbol(const char *start, const char *end) {
return rb_intern2(start, end - start);
}
static inline ID
parse_location_symbol(yp_location_t *location) {
return parse_symbol(location->start, location->end);
}
static inline ID
parse_node_symbol(yp_node_t *node) {
return parse_symbol(node->location.start, node->location.end);
}
static inline ID
parse_string_symbol(yp_string_t *string) {
const char *start = yp_string_source(string);
return parse_symbol(start, start + yp_string_length(string));
}
/******************************************************************************/
/* Create Ruby objects for compilation */
/******************************************************************************/
static VALUE
yp_iseq_new(yp_iseq_compiler_t *compiler) {
VALUE code_location = rb_ary_new_capa(4);
rb_ary_push(code_location, INT2FIX(1));
rb_ary_push(code_location, INT2FIX(0));
rb_ary_push(code_location, INT2FIX(1));
rb_ary_push(code_location, INT2FIX(0));
VALUE data = rb_hash_new();
rb_hash_aset(data, ID2SYM(rb_intern("arg_size")), INT2FIX(compiler->arg_size));
rb_hash_aset(data, ID2SYM(rb_intern("local_size")), INT2FIX(0));
rb_hash_aset(data, ID2SYM(rb_intern("stack_max")), INT2FIX(compiler->stack_max));
rb_hash_aset(data, ID2SYM(rb_intern("node_id")), INT2FIX(-1));
rb_hash_aset(data, ID2SYM(rb_intern("code_location")), code_location);
rb_hash_aset(data, ID2SYM(rb_intern("node_ids")), compiler->node_ids);
VALUE type = Qnil;
switch (compiler->type) {
case YP_ISEQ_TYPE_TOP:
type = ID2SYM(rb_intern("top"));
break;
case YP_ISEQ_TYPE_BLOCK:
type = ID2SYM(rb_intern("block"));
break;
}
VALUE iseq = rb_ary_new_capa(13);
rb_ary_push(iseq, rb_str_new_cstr("YARVInstructionSequence/SimpleDataFormat"));
rb_ary_push(iseq, INT2FIX(3));
rb_ary_push(iseq, INT2FIX(3));
rb_ary_push(iseq, INT2FIX(1));
rb_ary_push(iseq, data);
rb_ary_push(iseq, rb_str_new_cstr(compiler->name));
rb_ary_push(iseq, rb_str_new_cstr("<compiled>"));
rb_ary_push(iseq, rb_str_new_cstr("<compiled>"));
rb_ary_push(iseq, INT2FIX(1));
rb_ary_push(iseq, type);
rb_ary_push(iseq, rb_ary_new());
rb_ary_push(iseq, compiler->optionals);
rb_ary_push(iseq, rb_ary_new());
rb_ary_push(iseq, compiler->insns);
return iseq;
}
// static const int YP_CALLDATA_ARGS_SPLAT = 1 << 0;
// static const int YP_CALLDATA_ARGS_BLOCKARG = 1 << 1;
static const int YP_CALLDATA_FCALL = 1 << 2;
static const int YP_CALLDATA_VCALL = 1 << 3;
static const int YP_CALLDATA_ARGS_SIMPLE = 1 << 4;
// static const int YP_CALLDATA_BLOCKISEQ = 1 << 5;
// static const int YP_CALLDATA_KWARG = 1 << 6;
// static const int YP_CALLDATA_KW_SPLAT = 1 << 7;
// static const int YP_CALLDATA_TAILCALL = 1 << 8;
// static const int YP_CALLDATA_SUPER = 1 << 9;
// static const int YP_CALLDATA_ZSUPER = 1 << 10;
// static const int YP_CALLDATA_OPT_SEND = 1 << 11;
// static const int YP_CALLDATA_KW_SPLAT_MUT = 1 << 12;
static VALUE
yp_calldata_new(ID mid, int flag, size_t orig_argc) {
VALUE calldata = rb_hash_new();
rb_hash_aset(calldata, ID2SYM(rb_intern("mid")), ID2SYM(mid));
rb_hash_aset(calldata, ID2SYM(rb_intern("flag")), INT2FIX(flag));
rb_hash_aset(calldata, ID2SYM(rb_intern("orig_argc")), INT2FIX(orig_argc));
return calldata;
}
static inline VALUE
yp_inline_storage_new(yp_iseq_compiler_t *compiler) {
return INT2FIX(compiler->inline_storage_index++);
}
/******************************************************************************/
/* Push instructions onto a compiler */
/******************************************************************************/
static VALUE
push_insn(yp_iseq_compiler_t *compiler, int stack_change, size_t size, ...) {
va_list opnds;
va_start(opnds, size);
VALUE insn = rb_ary_new_capa(size);
for (size_t index = 0; index < size; index++) {
rb_ary_push(insn, va_arg(opnds, VALUE));
}
va_end(opnds);
compiler->stack_size += stack_change;
if (compiler->stack_size > compiler->stack_max) {
compiler->stack_max = compiler->stack_size;
}
compiler->size += size;
rb_ary_push(compiler->insns, insn);
rb_ary_push(compiler->node_ids, INT2FIX(-1));
return insn;
}
static VALUE
push_label(yp_iseq_compiler_t *compiler) {
VALUE label = ID2SYM(rb_intern_str(rb_sprintf("label_%zu", compiler->size)));
rb_ary_push(compiler->insns, label);
return label;
}
static void
push_ruby_event(yp_iseq_compiler_t *compiler, yp_ruby_event_t event) {
switch (event) {
case YP_RUBY_EVENT_B_CALL:
rb_ary_push(compiler->insns, ID2SYM(rb_intern("RUBY_EVENT_B_CALL")));
break;
case YP_RUBY_EVENT_B_RETURN:
rb_ary_push(compiler->insns, ID2SYM(rb_intern("RUBY_EVENT_B_RETURN")));
break;
}
}
static inline VALUE
push_anytostring(yp_iseq_compiler_t *compiler) {
return push_insn(compiler, -2 + 1, 1, ID2SYM(rb_intern("anytostring")));
}
static inline VALUE
push_branchif(yp_iseq_compiler_t *compiler, VALUE label) {
return push_insn(compiler, -1 + 0, 2, ID2SYM(rb_intern("branchif")), label);
}
static inline VALUE
push_branchunless(yp_iseq_compiler_t *compiler, VALUE label) {
return push_insn(compiler, -1 + 0, 2, ID2SYM(rb_intern("branchunless")), label);
}
static inline VALUE
push_concatstrings(yp_iseq_compiler_t *compiler, int count) {
return push_insn(compiler, -count + 1, 2, ID2SYM(rb_intern("concatstrings")), INT2FIX(count));
}
static inline VALUE
push_dup(yp_iseq_compiler_t *compiler) {
return push_insn(compiler, -1 + 2, 1, ID2SYM(rb_intern("dup")));
}
static inline VALUE
push_getclassvariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
return push_insn(compiler, -0 + 1, 3, ID2SYM(rb_intern("getclassvariable")), name, inline_storage);
}
static inline VALUE
push_getconstant(yp_iseq_compiler_t *compiler, VALUE name) {
return push_insn(compiler, -2 + 1, 2, ID2SYM(rb_intern("getconstant")), name);
}
static inline VALUE
push_getglobal(yp_iseq_compiler_t *compiler, VALUE name) {
return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("getglobal")), name);
}
static inline VALUE
push_getinstancevariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
return push_insn(compiler, -0 + 1, 3, ID2SYM(rb_intern("getinstancevariable")), name, inline_storage);
}
static inline VALUE
push_getlocal(yp_iseq_compiler_t *compiler, VALUE index, VALUE depth) {
return push_insn(compiler, -0 + 1, 3, ID2SYM(rb_intern("getlocal")), index, depth);
}
static inline VALUE
push_leave(yp_iseq_compiler_t *compiler) {
return push_insn(compiler, -1 + 0, 1, ID2SYM(rb_intern("leave")));
}
static inline VALUE
push_newarray(yp_iseq_compiler_t *compiler, int count) {
return push_insn(compiler, -count + 1, 2, ID2SYM(rb_intern("newarray")), INT2FIX(count));
}
static inline VALUE
push_newhash(yp_iseq_compiler_t *compiler, int count) {
return push_insn(compiler, -count + 1, 2, ID2SYM(rb_intern("newhash")), INT2FIX(count));
}
static inline VALUE
push_newrange(yp_iseq_compiler_t *compiler, VALUE flag) {
return push_insn(compiler, -2 + 1, 2, ID2SYM(rb_intern("newrange")), flag);
}
static inline VALUE
push_nop(yp_iseq_compiler_t *compiler) {
return push_insn(compiler, -2 + 1, 1, ID2SYM(rb_intern("nop")));
}
static inline VALUE
push_objtostring(yp_iseq_compiler_t *compiler, VALUE calldata) {
return push_insn(compiler, -1 + 1, 2, ID2SYM(rb_intern("objtostring")), calldata);
}
static inline VALUE
push_pop(yp_iseq_compiler_t *compiler) {
return push_insn(compiler, -1 + 0, 1, ID2SYM(rb_intern("pop")));
}
static inline VALUE
push_putnil(yp_iseq_compiler_t *compiler) {
return push_insn(compiler, -0 + 1, 1, ID2SYM(rb_intern("putnil")));
}
static inline VALUE
push_putobject(yp_iseq_compiler_t *compiler, VALUE value) {
return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("putobject")), value);
}
static inline VALUE
push_putself(yp_iseq_compiler_t *compiler) {
return push_insn(compiler, -0 + 1, 1, ID2SYM(rb_intern("putself")));
}
static inline VALUE
push_setlocal(yp_iseq_compiler_t *compiler, VALUE index, VALUE depth) {
return push_insn(compiler, -1 + 0, 3, ID2SYM(rb_intern("setlocal")), index, depth);
}
static const VALUE YP_SPECIALOBJECT_VMCORE = INT2FIX(1);
static const VALUE YP_SPECIALOBJECT_CBASE = INT2FIX(2);
// static const VALUE YP_SPECIALOBJECT_CONST_BASE = INT2FIX(3);
static inline VALUE
push_putspecialobject(yp_iseq_compiler_t *compiler, VALUE object) {
return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("putspecialobject")), object);
}
static inline VALUE
push_putstring(yp_iseq_compiler_t *compiler, VALUE string) {
return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("putstring")), string);
}
static inline VALUE
push_send(yp_iseq_compiler_t *compiler, int stack_change, VALUE calldata, VALUE block_iseq) {
return push_insn(compiler, stack_change, 3, ID2SYM(rb_intern("send")), calldata, block_iseq);
}
static inline VALUE
push_setclassvariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
return push_insn(compiler, -1 + 0, 3, ID2SYM(rb_intern("setclassvariable")), name, inline_storage);
}
static inline VALUE
push_setglobal(yp_iseq_compiler_t *compiler, VALUE name) {
return push_insn(compiler, -1 + 0, 2, ID2SYM(rb_intern("setglobal")), name);
}
static inline VALUE
push_setinstancevariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
return push_insn(compiler, -1 + 0, 3, ID2SYM(rb_intern("setinstancevariable")), name, inline_storage);
}
/******************************************************************************/
/* Compile an AST node using the given compiler */
/******************************************************************************/
static void
yp_compile_node(yp_iseq_compiler_t *compiler, yp_node_t *base_node) {
switch (base_node->type) {
case YP_NODE_ALIAS_NODE: {
yp_alias_node_t *node = (yp_alias_node_t *) base_node;
push_putspecialobject(compiler, YP_SPECIALOBJECT_VMCORE);
push_putspecialobject(compiler, YP_SPECIALOBJECT_CBASE);
yp_compile_node(compiler, node->new_name);
yp_compile_node(compiler, node->old_name);
push_send(compiler, -3, yp_calldata_new(rb_intern("core#set_method_alias"), YP_CALLDATA_ARGS_SIMPLE, 3), Qnil);
return;
}
case YP_NODE_AND_NODE: {
yp_and_node_t *node = (yp_and_node_t *) base_node;
yp_compile_node(compiler, node->left);
push_dup(compiler);
VALUE branchunless = push_branchunless(compiler, Qnil);
push_pop(compiler);
yp_compile_node(compiler, node->right);
VALUE label = push_label(compiler);
rb_ary_store(branchunless, 1, label);
return;
}
case YP_NODE_ARGUMENTS_NODE: {
yp_arguments_node_t *node = (yp_arguments_node_t *) base_node;
yp_node_list_t node_list = node->arguments;
for (size_t index = 0; index < node_list.size; index++) {
yp_compile_node(compiler, node_list.nodes[index]);
}
return;
}
case YP_NODE_ARRAY_NODE: {
yp_array_node_t *node = (yp_array_node_t *) base_node;
yp_node_list_t elements = node->elements;
for (size_t index = 0; index < elements.size; index++) {
yp_compile_node(compiler, elements.nodes[index]);
}
push_newarray(compiler, sizet2int(elements.size));
return;
}
case YP_NODE_ASSOC_NODE: {
yp_assoc_node_t *node = (yp_assoc_node_t *) base_node;
yp_compile_node(compiler, node->key);
yp_compile_node(compiler, node->value);
return;
}
case YP_NODE_BLOCK_NODE: {
yp_block_node_t *node = (yp_block_node_t *) base_node;
VALUE optional_labels = rb_ary_new();
if (node->parameters &&
node->parameters->parameters &&
node->parameters->parameters->optionals.size > 0) {
compiler->arg_size += node->parameters->parameters->optionals.size;
yp_node_list_t *optionals = &node->parameters->parameters->optionals;
for (size_t i = 0; i < optionals->size; i++) {
VALUE label = push_label(compiler);
rb_ary_push(optional_labels, label);
yp_compile_node(compiler, optionals->nodes[i]);
}
VALUE label = push_label(compiler);
rb_ary_push(optional_labels, label);
rb_hash_aset(compiler->optionals, ID2SYM(rb_intern("opt")), optional_labels);
push_ruby_event(compiler, YP_RUBY_EVENT_B_CALL);
push_nop(compiler);
} else {
push_ruby_event(compiler, YP_RUBY_EVENT_B_CALL);
}
if (node->statements) {
yp_compile_node(compiler, node->statements);
} else {
push_putnil(compiler);
}
push_ruby_event(compiler, YP_RUBY_EVENT_B_RETURN);
push_leave(compiler);
return;
}
case YP_NODE_CALL_NODE: {
yp_call_node_t *node = (yp_call_node_t *) base_node;
ID mid = parse_location_symbol(&node->message_loc);
int flags = 0;
size_t orig_argc;
if (node->receiver == NULL) {
push_putself(compiler);
} else {
yp_compile_node(compiler, node->receiver);
}
if (node->arguments == NULL) {
if (flags & YP_CALLDATA_FCALL) flags |= YP_CALLDATA_VCALL;
orig_argc = 0;
} else {
yp_arguments_node_t *arguments = node->arguments;
yp_compile_node(compiler, (yp_node_t *) arguments);
orig_argc = arguments->arguments.size;
}
VALUE block_iseq = Qnil;
if (node->block != NULL) {
yp_iseq_compiler_t block_compiler;
yp_iseq_compiler_init(
&block_compiler,
compiler,
&node->block->locals,
"block in <compiled>",
YP_ISEQ_TYPE_BLOCK
);
yp_compile_node(&block_compiler, (yp_node_t *) node->block);
block_iseq = yp_iseq_new(&block_compiler);
}
if (block_iseq == Qnil && flags == 0) {
flags |= YP_CALLDATA_ARGS_SIMPLE;
}
if (node->receiver == NULL) {
flags |= YP_CALLDATA_FCALL;
if (block_iseq == Qnil && node->arguments == NULL) {
flags |= YP_CALLDATA_VCALL;
}
}
push_send(compiler, -sizet2int(orig_argc), yp_calldata_new(mid, flags, orig_argc), block_iseq);
return;
}
case YP_NODE_CLASS_VARIABLE_READ_NODE: {
yp_class_variable_read_node_t *node = (yp_class_variable_read_node_t *) base_node;
push_getclassvariable(compiler, ID2SYM(parse_node_symbol((yp_node_t *) node)), yp_inline_storage_new(compiler));
return;
}
case YP_NODE_CLASS_VARIABLE_WRITE_NODE: {
yp_class_variable_write_node_t *node = (yp_class_variable_write_node_t *) base_node;
if (node->value == NULL) {
rb_raise(rb_eNotImpError, "class variable write without value not implemented");
}
yp_compile_node(compiler, node->value);
push_dup(compiler);
push_setclassvariable(compiler, ID2SYM(parse_location_symbol(&node->name_loc)), yp_inline_storage_new(compiler));
return;
}
case YP_NODE_CONSTANT_PATH_NODE: {
yp_constant_path_node_t *node = (yp_constant_path_node_t *) base_node;
yp_compile_node(compiler, node->parent);
push_putobject(compiler, Qfalse);
push_getconstant(compiler, ID2SYM(parse_node_symbol((yp_node_t *) node->child)));
return;
}
case YP_NODE_CONSTANT_READ_NODE:
push_putnil(compiler);
push_putobject(compiler, Qtrue);
push_getconstant(compiler, ID2SYM(parse_node_symbol((yp_node_t *) base_node)));
return;
case YP_NODE_EMBEDDED_STATEMENTS_NODE: {
yp_embedded_statements_node_t *node = (yp_embedded_statements_node_t *) base_node;
yp_compile_node(compiler, (yp_node_t *) node->statements);
return;
}
case YP_NODE_FALSE_NODE:
push_putobject(compiler, Qfalse);
return;
case YP_NODE_GLOBAL_VARIABLE_READ_NODE:
push_getglobal(compiler, ID2SYM(parse_location_symbol(&base_node->location)));
return;
case YP_NODE_GLOBAL_VARIABLE_WRITE_NODE: {
yp_global_variable_write_node_t *node = (yp_global_variable_write_node_t *) base_node;
if (node->value == NULL) {
rb_raise(rb_eNotImpError, "global variable write without value not implemented");
}
yp_compile_node(compiler, node->value);
push_dup(compiler);
push_setglobal(compiler, ID2SYM(parse_location_symbol(&node->name_loc)));
return;
}
case YP_NODE_HASH_NODE: {
yp_hash_node_t *node = (yp_hash_node_t *) base_node;
yp_node_list_t elements = node->elements;
for (size_t index = 0; index < elements.size; index++) {
yp_compile_node(compiler, elements.nodes[index]);
}
push_newhash(compiler, sizet2int(elements.size * 2));
return;
}
case YP_NODE_INSTANCE_VARIABLE_READ_NODE:
push_getinstancevariable(compiler, ID2SYM(parse_node_symbol((yp_node_t *) base_node)), yp_inline_storage_new(compiler));
return;
case YP_NODE_INSTANCE_VARIABLE_WRITE_NODE: {
yp_instance_variable_write_node_t *node = (yp_instance_variable_write_node_t *) base_node;
if (node->value == NULL) {
rb_raise(rb_eNotImpError, "instance variable write without value not implemented");
}
yp_compile_node(compiler, node->value);
push_dup(compiler);
push_setinstancevariable(compiler, ID2SYM(parse_location_symbol(&node->name_loc)), yp_inline_storage_new(compiler));
return;
}
case YP_NODE_INTEGER_NODE:
push_putobject(compiler, parse_number(base_node->location.start, base_node->location.end));
return;
case YP_NODE_INTERPOLATED_STRING_NODE: {
yp_interpolated_string_node_t *node = (yp_interpolated_string_node_t *) base_node;
for (size_t index = 0; index < node->parts.size; index++) {
yp_node_t *part = node->parts.nodes[index];
switch (part->type) {
case YP_NODE_STRING_NODE: {
yp_string_node_t *string_node = (yp_string_node_t *) part;
push_putobject(compiler, parse_string(&string_node->unescaped));
break;
}
default:
yp_compile_node(compiler, part);
push_dup(compiler);
push_objtostring(compiler, yp_calldata_new(rb_intern("to_s"), YP_CALLDATA_FCALL | YP_CALLDATA_ARGS_SIMPLE, 0));
push_anytostring(compiler);
break;
}
}
push_concatstrings(compiler, sizet2int(node->parts.size));
return;
}
case YP_NODE_KEYWORD_HASH_NODE: {
yp_keyword_hash_node_t *node = (yp_keyword_hash_node_t *) base_node;
yp_node_list_t elements = node->elements;
for (size_t index = 0; index < elements.size; index++) {
yp_compile_node(compiler, elements.nodes[index]);
}
push_newhash(compiler, sizet2int(elements.size * 2));
return;
}
case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
yp_local_variable_read_node_t *node = (yp_local_variable_read_node_t *) base_node;
int index = local_index(compiler, node->constant_id, node->depth);
push_getlocal(compiler, INT2FIX(index), INT2FIX(node->depth));
return;
}
case YP_NODE_LOCAL_VARIABLE_WRITE_NODE: {
yp_local_variable_write_node_t *node = (yp_local_variable_write_node_t *) base_node;
if (node->value == NULL) {
rb_raise(rb_eNotImpError, "local variable write without value not implemented");
}
int index = local_index(compiler, node->constant_id, node->depth);
yp_compile_node(compiler, node->value);
push_dup(compiler);
push_setlocal(compiler, INT2FIX(index), INT2FIX(node->depth));
return;
}
case YP_NODE_NIL_NODE:
push_putnil(compiler);
return;
case YP_NODE_OR_NODE: {
yp_or_node_t *node = (yp_or_node_t *) base_node;
yp_compile_node(compiler, node->left);
push_dup(compiler);
VALUE branchif = push_branchif(compiler, Qnil);
push_pop(compiler);
yp_compile_node(compiler, node->right);
VALUE label = push_label(compiler);
rb_ary_store(branchif, 1, label);
return;
}
case YP_NODE_PARENTHESES_NODE: {
yp_parentheses_node_t *node = (yp_parentheses_node_t *) base_node;
if (node->statements == NULL) {
push_putnil(compiler);
} else {
yp_compile_node(compiler, node->statements);
}
return;
}
case YP_NODE_PROGRAM_NODE: {
yp_program_node_t *node = (yp_program_node_t *) base_node;
if (node->statements->body.size == 0) {
push_putnil(compiler);
} else {
yp_compile_node(compiler, (yp_node_t *) node->statements);
}
push_leave(compiler);
return;
}
case YP_NODE_RANGE_NODE: {
yp_range_node_t *node = (yp_range_node_t *) base_node;
if (node->left == NULL) {
push_putnil(compiler);
} else {
yp_compile_node(compiler, node->left);
}
if (node->right == NULL) {
push_putnil(compiler);
} else {
yp_compile_node(compiler, node->right);
}
push_newrange(compiler, INT2FIX((node->operator_loc.end - node->operator_loc.start) == 3));
return;
}
case YP_NODE_SELF_NODE:
push_putself(compiler);
return;
case YP_NODE_STATEMENTS_NODE: {
yp_statements_node_t *node = (yp_statements_node_t *) base_node;
yp_node_list_t node_list = node->body;
for (size_t index = 0; index < node_list.size; index++) {
yp_compile_node(compiler, node_list.nodes[index]);
if (index < node_list.size - 1) push_pop(compiler);
}
return;
}
case YP_NODE_STRING_NODE: {
yp_string_node_t *node = (yp_string_node_t *) base_node;
push_putstring(compiler, parse_string(&node->unescaped));
return;
}
case YP_NODE_SYMBOL_NODE: {
yp_symbol_node_t *node = (yp_symbol_node_t *) base_node;
push_putobject(compiler, ID2SYM(parse_string_symbol(&node->unescaped)));
return;
}
case YP_NODE_TRUE_NODE:
push_putobject(compiler, Qtrue);
return;
case YP_NODE_UNDEF_NODE: {
yp_undef_node_t *node = (yp_undef_node_t *) base_node;
for (size_t index = 0; index < node->names.size; index++) {
push_putspecialobject(compiler, YP_SPECIALOBJECT_VMCORE);
push_putspecialobject(compiler, YP_SPECIALOBJECT_CBASE);
yp_compile_node(compiler, node->names.nodes[index]);
push_send(compiler, -2, yp_calldata_new(rb_intern("core#undef_method"), YP_CALLDATA_ARGS_SIMPLE, 2), Qnil);
if (index < node->names.size - 1) push_pop(compiler);
}
return;
}
case YP_NODE_X_STRING_NODE: {
yp_x_string_node_t *node = (yp_x_string_node_t *) base_node;
push_putself(compiler);
push_putobject(compiler, parse_string(&node->unescaped));
push_send(compiler, -1, yp_calldata_new(rb_intern("`"), YP_CALLDATA_FCALL | YP_CALLDATA_ARGS_SIMPLE, 1), Qnil);
return;
}
case YP_NODE_OPTIONAL_PARAMETER_NODE: {
yp_optional_parameter_node_t *node = (yp_optional_parameter_node_t *) base_node;
int depth = 0;
int index = local_index(compiler, node->constant_id, depth);
yp_compile_node(compiler, node->value);
push_setlocal(compiler, INT2FIX(index), INT2FIX(depth));
break;
}
default:
rb_raise(rb_eNotImpError, "node type %d not implemented", base_node->type);
return;
}
}
// This function compiles the given node into a list of instructions.
VALUE
yp_compile(yp_node_t *node) {
assert(node->type == YP_NODE_PROGRAM_NODE);
yp_iseq_compiler_t compiler;
yp_iseq_compiler_init(
&compiler,
NULL,
&((yp_program_node_t *) node)->locals,
"<compiled>",
YP_ISEQ_TYPE_TOP
);
yp_compile_node(&compiler, node);
return yp_iseq_new(&compiler);
}

1
yarp/config.h Normal file
View File

@ -0,0 +1 @@
#include "ruby/config.h"

View File

@ -1,8 +1,20 @@
#ifndef YARP_DEFINES_H
#define YARP_DEFINES_H
// This file should be included first by any *.h or *.c in YARP
#include "yarp/config.h"
#include <ctype.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
// YP_EXPORTED_FUNCTION
#if defined(_WIN32)
#if defined(YP_STATIC)
# define YP_EXPORTED_FUNCTION
#elif defined(_WIN32)
# define YP_EXPORTED_FUNCTION __declspec(dllexport) extern
#else
# ifndef YP_EXPORTED_FUNCTION
@ -16,9 +28,9 @@
// YP_ATTRIBUTE_UNUSED
#if defined(__GNUC__)
# define YP_ATTRIBUTE_UNUSED __attribute__((unused))
# define YP_ATTRIBUTE_UNUSED __attribute__((unused))
#else
# define YP_ATTRIBUTE_UNUSED
# define YP_ATTRIBUTE_UNUSED
#endif
// inline
@ -26,4 +38,13 @@
# define inline __inline
#endif
int yp_strncasecmp(const char *string1, const char *string2, size_t length);
int yp_snprintf(char *dest, YP_ATTRIBUTE_UNUSED size_t size, const char *format, ...);
#if defined(HAVE_SNPRINTF)
// We use snprintf if it's available
# define yp_snprintf snprintf
#endif
#endif

View File

@ -2,12 +2,11 @@
#define YARP_DIAGNOSTIC_H
#include "yarp/defines.h"
#include "yarp/util/yp_list.h"
#include <stdbool.h>
#include <stdlib.h>
#include "yarp/util/yp_list.h"
// This struct represents a diagnostic found during parsing.
typedef struct {
yp_list_node_t node;

View File

@ -51,7 +51,8 @@ yp_encoding_t yp_encoding_ascii = {
.char_width = yp_encoding_ascii_char_width,
.alnum_char = yp_encoding_ascii_alnum_char,
.alpha_char = yp_encoding_ascii_alpha_char,
.isupper_char = yp_encoding_ascii_isupper_char
.isupper_char = yp_encoding_ascii_isupper_char,
.multibyte = false
};
yp_encoding_t yp_encoding_ascii_8bit = {
@ -60,4 +61,5 @@ yp_encoding_t yp_encoding_ascii_8bit = {
.alnum_char = yp_encoding_ascii_alnum_char,
.alpha_char = yp_encoding_ascii_alpha_char,
.isupper_char = yp_encoding_ascii_isupper_char,
.multibyte = false
};

View File

@ -74,5 +74,6 @@ yp_encoding_t yp_encoding_big5 = {
.char_width = yp_encoding_big5_char_width,
.alnum_char = yp_encoding_big5_alnum_char,
.alpha_char = yp_encoding_big5_alpha_char,
.isupper_char = yp_encoding_big5_isupper_char
.isupper_char = yp_encoding_big5_isupper_char,
.multibyte = true
};

View File

@ -12,11 +12,28 @@
// Each callback should return the number of bytes, or 0 if the next bytes are
// invalid for the encoding and type.
typedef struct {
const char *name;
// Return the number of bytes that the next character takes if it is valid
// in the encoding.
size_t (*char_width)(const char *c);
// Return the number of bytes that the next character takes if it is valid
// in the encoding and is alphabetical.
size_t (*alpha_char)(const char *c);
// Return the number of bytes that the next character takes if it is valid
// in the encoding and is alphanumeric.
size_t (*alnum_char)(const char *c);
// Return true if the next character is valid in the encoding and is an
// uppercase character.
bool (*isupper_char)(const char *c);
// The name of the encoding. This should correspond to a value that can be
// passed to Encoding.find in Ruby.
const char *name;
// Return true if the encoding is a multibyte encoding.
bool multibyte;
} yp_encoding_t;
// These bits define the location of each bit of metadata within the various

View File

@ -77,5 +77,6 @@ yp_encoding_t yp_encoding_euc_jp = {
.char_width = yp_encoding_euc_jp_char_width,
.alnum_char = yp_encoding_euc_jp_alnum_char,
.alpha_char = yp_encoding_euc_jp_alpha_char,
.isupper_char = yp_encoding_euc_jp_isupper_char
.isupper_char = yp_encoding_euc_jp_isupper_char,
.multibyte = true
};

View File

@ -80,5 +80,6 @@ yp_encoding_t yp_encoding_gbk = {
.char_width = yp_encoding_gbk_char_width,
.alnum_char = yp_encoding_gbk_alnum_char,
.alpha_char = yp_encoding_gbk_alpha_char,
.isupper_char = yp_encoding_gbk_isupper_char
.isupper_char = yp_encoding_gbk_isupper_char,
.multibyte = true
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_1 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_1_alnum_char,
.alpha_char = yp_encoding_iso_8859_1_alpha_char,
.isupper_char = yp_encoding_iso_8859_1_isupper_char
.isupper_char = yp_encoding_iso_8859_1_isupper_char,
.multibyte = false
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_10 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_10_alnum_char,
.alpha_char = yp_encoding_iso_8859_10_alpha_char,
.isupper_char = yp_encoding_iso_8859_10_isupper_char
.isupper_char = yp_encoding_iso_8859_10_isupper_char,
.multibyte = false
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_11 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_11_alnum_char,
.alpha_char = yp_encoding_iso_8859_11_alpha_char,
.isupper_char = yp_encoding_iso_8859_11_isupper_char
.isupper_char = yp_encoding_iso_8859_11_isupper_char,
.multibyte = false
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_13 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_13_alnum_char,
.alpha_char = yp_encoding_iso_8859_13_alpha_char,
.isupper_char = yp_encoding_iso_8859_13_isupper_char
.isupper_char = yp_encoding_iso_8859_13_isupper_char,
.multibyte = false
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_14 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_14_alnum_char,
.alpha_char = yp_encoding_iso_8859_14_alpha_char,
.isupper_char = yp_encoding_iso_8859_14_isupper_char
.isupper_char = yp_encoding_iso_8859_14_isupper_char,
.multibyte = false
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_15 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_15_alnum_char,
.alpha_char = yp_encoding_iso_8859_15_alpha_char,
.isupper_char = yp_encoding_iso_8859_15_isupper_char
.isupper_char = yp_encoding_iso_8859_15_isupper_char,
.multibyte = false
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_16 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_16_alnum_char,
.alpha_char = yp_encoding_iso_8859_16_alpha_char,
.isupper_char = yp_encoding_iso_8859_16_isupper_char
.isupper_char = yp_encoding_iso_8859_16_isupper_char,
.multibyte = false
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_2 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_2_alnum_char,
.alpha_char = yp_encoding_iso_8859_2_alpha_char,
.isupper_char = yp_encoding_iso_8859_2_isupper_char
.isupper_char = yp_encoding_iso_8859_2_isupper_char,
.multibyte = false
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_3 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_3_alnum_char,
.alpha_char = yp_encoding_iso_8859_3_alpha_char,
.isupper_char = yp_encoding_iso_8859_3_isupper_char
.isupper_char = yp_encoding_iso_8859_3_isupper_char,
.multibyte = false
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_4 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_4_alnum_char,
.alpha_char = yp_encoding_iso_8859_4_alpha_char,
.isupper_char = yp_encoding_iso_8859_4_isupper_char
.isupper_char = yp_encoding_iso_8859_4_isupper_char,
.multibyte = false
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_5 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_5_alnum_char,
.alpha_char = yp_encoding_iso_8859_5_alpha_char,
.isupper_char = yp_encoding_iso_8859_5_isupper_char
.isupper_char = yp_encoding_iso_8859_5_isupper_char,
.multibyte = false
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_6 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_6_alnum_char,
.alpha_char = yp_encoding_iso_8859_6_alpha_char,
.isupper_char = yp_encoding_iso_8859_6_isupper_char
.isupper_char = yp_encoding_iso_8859_6_isupper_char,
.multibyte = false
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_7 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_7_alnum_char,
.alpha_char = yp_encoding_iso_8859_7_alpha_char,
.isupper_char = yp_encoding_iso_8859_7_isupper_char
.isupper_char = yp_encoding_iso_8859_7_isupper_char,
.multibyte = false
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_8 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_8_alnum_char,
.alpha_char = yp_encoding_iso_8859_8_alpha_char,
.isupper_char = yp_encoding_iso_8859_8_isupper_char
.isupper_char = yp_encoding_iso_8859_8_isupper_char,
.multibyte = false
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_9 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_9_alnum_char,
.alpha_char = yp_encoding_iso_8859_9_alpha_char,
.isupper_char = yp_encoding_iso_8859_9_isupper_char
.isupper_char = yp_encoding_iso_8859_9_isupper_char,
.multibyte = false
};

View File

@ -51,5 +51,6 @@ yp_encoding_t yp_encoding_koi8_r = {
.char_width = yp_encoding_koi8_r_char_width,
.alnum_char = yp_encoding_koi8_r_alnum_char,
.alpha_char = yp_encoding_koi8_r_alpha_char,
.isupper_char = yp_encoding_koi8_r_isupper_char
.isupper_char = yp_encoding_koi8_r_isupper_char,
.multibyte = false
};

View File

@ -77,5 +77,6 @@ yp_encoding_t yp_encoding_shift_jis = {
.char_width = yp_encoding_shift_jis_char_width,
.alnum_char = yp_encoding_shift_jis_alnum_char,
.alpha_char = yp_encoding_shift_jis_alpha_char,
.isupper_char = yp_encoding_shift_jis_isupper_char
.isupper_char = yp_encoding_shift_jis_isupper_char,
.multibyte = true
};

View File

@ -2230,7 +2230,7 @@ utf_8_codepoint(const unsigned char *c, size_t *width) {
codepoint = (state != 0) ?
(byte & 0x3fu) | (codepoint << 6) :
(0xff >> type) & (byte);
(0xffu >> type) & (byte);
state = utf_8_dfa[256 + (state * 16) + type];
if (!state) {
@ -2312,5 +2312,6 @@ yp_encoding_t yp_encoding_utf_8 = {
.char_width = yp_encoding_utf_8_char_width,
.alnum_char = yp_encoding_utf_8_alnum_char,
.alpha_char = yp_encoding_utf_8_alpha_char,
.isupper_char = yp_encoding_utf_8_isupper_char
.isupper_char = yp_encoding_utf_8_isupper_char,
.multibyte = true
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_windows_1251 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_windows_1251_alnum_char,
.alpha_char = yp_encoding_windows_1251_alpha_char,
.isupper_char = yp_encoding_windows_1251_isupper_char
.isupper_char = yp_encoding_windows_1251_isupper_char,
.multibyte = false
};

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_windows_1252 = {
.char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_windows_1252_alnum_char,
.alpha_char = yp_encoding_windows_1252_alpha_char,
.isupper_char = yp_encoding_windows_1252_isupper_char
.isupper_char = yp_encoding_windows_1252_isupper_char,
.multibyte = false
};

View File

@ -77,5 +77,6 @@ yp_encoding_t yp_encoding_windows_31j = {
.char_width = yp_encoding_windows_31j_char_width,
.alnum_char = yp_encoding_windows_31j_alnum_char,
.alpha_char = yp_encoding_windows_31j_alpha_char,
.isupper_char = yp_encoding_windows_31j_isupper_char
.isupper_char = yp_encoding_windows_31j_isupper_char,
.multibyte = true
};

View File

@ -1,6 +1,7 @@
#include "yarp/extension.h"
VALUE rb_cYARP;
VALUE rb_cYARPSource;
VALUE rb_cYARPToken;
VALUE rb_cYARPLocation;
@ -9,51 +10,97 @@ VALUE rb_cYARPParseError;
VALUE rb_cYARPParseWarning;
VALUE rb_cYARPParseResult;
// Represents a source of Ruby code. It can either be coming from a file or a
// string. If it's a file, it's going to mmap the contents of the file. If it's
// a string it's going to just point to the contents of the string.
/******************************************************************************/
/* IO of Ruby code */
/******************************************************************************/
// Represents an input of Ruby code. It can either be coming from a file or a
// string. If it's a file, we'll use demand paging to read the contents of the
// file into a string. If it's already a string, we'll reference it directly.
typedef struct {
enum { SOURCE_FILE, SOURCE_STRING } type;
const char *source;
size_t size;
} source_t;
} input_t;
// Check if the given filepath is a string. If it's nil, then return NULL. If
// it's not a string, then raise a type error. Otherwise return the filepath as
// a C string.
static const char *
check_filepath(VALUE filepath) {
// If the filepath is nil, then we don't need to do anything.
if (NIL_P(filepath)) {
return NULL;
}
// Check if the filepath is a string. If it's not, then raise a type error.
if (!RB_TYPE_P(filepath, T_STRING)) {
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
}
// Otherwise, return the filepath as a C string.
return StringValueCStr(filepath);
}
// Read the file indicated by the filepath parameter into source and load its
// contents and size into the given source_t.
// contents and size into the given input_t.
//
// We want to use demand paging as much as possible in order to avoid having to
// read the entire file into memory (which could be detrimental to performance
// for large files). This means that if we're on windows we'll use
// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
// `mmap`, and on other POSIX systems we'll use `read`.
static int
source_file_load(source_t *source, VALUE filepath) {
input_load_filepath(input_t *input, const char *filepath) {
#ifdef _WIN32
HANDLE file = CreateFile(
StringValueCStr(filepath),
GENERIC_READ,
0,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL
);
// Open the file for reading.
HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (file == INVALID_HANDLE_VALUE) {
perror("Invalid handle for file");
perror("CreateFile failed");
return 1;
}
// Get the file size.
DWORD file_size = GetFileSize(file, NULL);
source->source = malloc(file_size);
if (file_size == INVALID_FILE_SIZE) {
CloseHandle(file);
perror("GetFileSize failed");
return 1;
}
DWORD bytes_read;
BOOL success = ReadFile(file, DISCARD_CONST_QUAL(void *, source->source), file_size, &bytes_read, NULL);
// If the file is empty, then we don't need to do anything else, we'll set
// the source to a constant empty string and return.
if (!file_size) {
CloseHandle(file);
input->size = 0;
input->source = "";
return 0;
}
// Create a mapping of the file.
HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
if (mapping == NULL) {
CloseHandle(file);
perror("CreateFileMapping failed");
return 1;
}
// Map the file into memory.
input->source = (const char *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
CloseHandle(mapping);
CloseHandle(file);
if (!success) {
perror("ReadFile failed");
if (input->source == NULL) {
perror("MapViewOfFile failed");
return 1;
}
source->size = (size_t) file_size;
// Set the size of the source.
input->size = (size_t) file_size;
return 0;
#else
// Open the file for reading
int fd = open(StringValueCStr(filepath), O_RDONLY);
int fd = open(filepath, O_RDONLY);
if (fd == -1) {
perror("open");
return 1;
@ -68,30 +115,30 @@ source_file_load(source_t *source, VALUE filepath) {
}
// mmap the file descriptor to virtually get the contents
source->size = sb.st_size;
input->size = sb.st_size;
#ifdef HAVE_MMAP
if (!source->size) {
if (!input->size) {
close(fd);
source->source = "";
input->source = "";
return 0;
}
char * res = mmap(NULL, source->size, PROT_READ, MAP_PRIVATE, fd, 0);
if (res == MAP_FAILED) {
const char *result = mmap(NULL, input->size, PROT_READ, MAP_PRIVATE, fd, 0);
if (result == MAP_FAILED) {
perror("Map failed");
return 1;
} else {
source->source = res;
input->source = result;
}
#else
source->source = malloc(source->size);
if (source->source == NULL) return 1;
input->source = malloc(input->size);
if (input->source == NULL) return 1;
ssize_t read_size = read(fd, (void *)source->source, source->size);
if (read_size < 0 || (size_t)read_size != source->size) {
ssize_t read_size = read(fd, (void *) input->source, input->size);
if (read_size < 0 || (size_t)read_size != input->size) {
perror("Read size is incorrect");
free((void *)source->source);
free((void *) input->source);
return 1;
}
#endif
@ -101,86 +148,106 @@ source_file_load(source_t *source, VALUE filepath) {
#endif
}
// Load the contents and size of the given string into the given source_t.
// Load the contents and size of the given string into the given input_t.
static void
source_string_load(source_t *source, VALUE string) {
*source = (source_t) {
.type = SOURCE_STRING,
.source = RSTRING_PTR(string),
.size = RSTRING_LEN(string),
};
input_load_string(input_t *input, VALUE string) {
// Check if the string is a string. If it's not, then raise a type error.
if (!RB_TYPE_P(string, T_STRING)) {
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string));
}
input->source = RSTRING_PTR(string);
input->size = RSTRING_LEN(string);
}
// Free any resources associated with the given source_t.
// Free any resources associated with the given input_t. This is the corollary
// function to source_file_load. It will unmap the file if it was mapped, or
// free the memory if it was allocated.
static void
source_file_unload(source_t *source) {
#ifdef _WIN32
free((void *)source->source);
input_unload_filepath(input_t *input) {
// We don't need to free anything with 0 sized files because we handle that
// with a constant string instead.
if (!input->size) return;
void *memory = (void *) input->source;
#if defined(_WIN32)
UnmapViewOfFile(memory);
#elif defined(HAVE_MMAP)
munmap(memory, input->size);
#else
#ifdef HAVE_MMAP
munmap((void *)source->source, source->size);
#else
free((void *)source->source);
#endif
free(memory);
#endif
}
// Dump the AST corresponding to the given source to a string.
/******************************************************************************/
/* Serializing the AST */
/******************************************************************************/
// Dump the AST corresponding to the given input to a string.
static VALUE
dump_source(source_t *source, const char *filepath) {
dump_input(input_t *input, const char *filepath) {
yp_buffer_t buffer;
if (!yp_buffer_init(&buffer)) {
rb_raise(rb_eNoMemError, "failed to allocate memory");
}
yp_parser_t parser;
yp_parser_init(&parser, source->source, source->size, filepath);
yp_parser_init(&parser, input->source, input->size, filepath);
yp_node_t *node = yp_parse(&parser);
yp_buffer_t buffer;
if (!yp_buffer_init(&buffer)) rb_raise(rb_eNoMemError, "failed to allocate memory");
yp_serialize(&parser, node, &buffer);
VALUE dumped = rb_str_new(buffer.value, buffer.length);
VALUE result = rb_str_new(buffer.value, buffer.length);
yp_node_destroy(&parser, node);
yp_buffer_free(&buffer);
yp_parser_free(&parser);
return dumped;
return result;
}
// Dump the AST corresponding to the given string to a string.
static VALUE
dump(VALUE self, VALUE string, VALUE filepath) {
source_t source;
source_string_load(&source, string);
char *str = NULL;
dump(int argc, VALUE *argv, VALUE self) {
VALUE string;
VALUE filepath;
rb_scan_args(argc, argv, "11", &string, &filepath);
if (filepath != Qnil) {
str = StringValueCStr(filepath);
}
return dump_source(&source, str);
input_t input;
input_load_string(&input, string);
return dump_input(&input, check_filepath(filepath));
}
// Dump the AST corresponding to the given file to a string.
static VALUE
dump_file(VALUE self, VALUE filepath) {
source_t source;
if (source_file_load(&source, filepath) != 0) return Qnil;
input_t input;
const char *checked = check_filepath(filepath);
if (input_load_filepath(&input, checked) != 0) return Qnil;
VALUE value = dump_input(&input, checked);
input_unload_filepath(&input);
VALUE value = dump_source(&source, StringValueCStr(filepath));
source_file_unload(&source);
return value;
}
/******************************************************************************/
/* Extracting values for the parse result */
/******************************************************************************/
// Extract the comments out of the parser into an array.
static VALUE
parser_comments(yp_parser_t *parser) {
parser_comments(yp_parser_t *parser, VALUE source) {
VALUE comments = rb_ary_new();
yp_comment_t *comment;
for (comment = (yp_comment_t *) parser->comment_list.head; comment != NULL; comment = (yp_comment_t *) comment->node.next) {
VALUE location_argv[] = { LONG2FIX(comment->start - parser->start), LONG2FIX(comment->end - parser->start) };
for (yp_comment_t *comment = (yp_comment_t *) parser->comment_list.head; comment != NULL; comment = (yp_comment_t *) comment->node.next) {
VALUE location_argv[] = {
source,
LONG2FIX(comment->start - parser->start),
LONG2FIX(comment->end - parser->start)
};
VALUE type;
switch (comment->type) {
case YP_COMMENT_INLINE:
type = ID2SYM(rb_intern("inline"));
@ -196,7 +263,7 @@ parser_comments(yp_parser_t *parser) {
break;
}
VALUE comment_argv[] = { type, rb_class_new_instance(2, location_argv, rb_cYARPLocation) };
VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv, rb_cYARPLocation) };
rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cYARPComment));
}
@ -205,19 +272,20 @@ parser_comments(yp_parser_t *parser) {
// Extract the errors out of the parser into an array.
static VALUE
parser_errors(yp_parser_t *parser, rb_encoding *encoding) {
parser_errors(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
VALUE errors = rb_ary_new();
yp_diagnostic_t *error;
for (error = (yp_diagnostic_t *) parser->error_list.head; error != NULL; error = (yp_diagnostic_t *) error->node.next) {
VALUE location_argv[] = {
source,
LONG2FIX(error->start - parser->start),
LONG2FIX(error->end - parser->start)
};
VALUE error_argv[] = {
rb_enc_str_new_cstr(error->message, encoding),
rb_class_new_instance(2, location_argv, rb_cYARPLocation)
rb_class_new_instance(3, location_argv, rb_cYARPLocation)
};
rb_ary_push(errors, rb_class_new_instance(2, error_argv, rb_cYARPParseError));
@ -228,19 +296,20 @@ parser_errors(yp_parser_t *parser, rb_encoding *encoding) {
// Extract the warnings out of the parser into an array.
static VALUE
parser_warnings(yp_parser_t *parser, rb_encoding *encoding) {
parser_warnings(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
VALUE warnings = rb_ary_new();
yp_diagnostic_t *warning;
for (warning = (yp_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (yp_diagnostic_t *) warning->node.next) {
VALUE location_argv[] = {
source,
LONG2FIX(warning->start - parser->start),
LONG2FIX(warning->end - parser->start)
};
VALUE warning_argv[] = {
rb_enc_str_new_cstr(warning->message, encoding),
rb_class_new_instance(2, location_argv, rb_cYARPLocation)
rb_class_new_instance(3, location_argv, rb_cYARPLocation)
};
rb_ary_push(warnings, rb_class_new_instance(2, warning_argv, rb_cYARPParseWarning));
@ -249,22 +318,36 @@ parser_warnings(yp_parser_t *parser, rb_encoding *encoding) {
return warnings;
}
/******************************************************************************/
/* Lexing Ruby code */
/******************************************************************************/
// This struct gets stored in the parser and passed in to the lex callback any
// time a new token is found. We use it to store the necessary information to
// initialize a Token instance.
typedef struct {
VALUE source;
VALUE tokens;
rb_encoding *encoding;
} lex_data_t;
// This is passed as a callback to the parser. It gets called every time a new
// token is found. Once found, we initialize a new instance of Token and push it
// onto the tokens array.
static void
lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
VALUE yields = rb_ary_new_capa(2);
rb_ary_push(yields, yp_token_new(parser, token, lex_data->encoding));
rb_ary_push(yields, yp_token_new(parser, token, lex_data->encoding, lex_data->source));
rb_ary_push(yields, INT2FIX(parser->lex_state));
rb_ary_push(lex_data->tokens, yields);
}
// This is called whenever the encoding changes based on the magic comment at
// the top of the file. We use it to update the encoding that we are using to
// create tokens.
static void
lex_encoding_changed_callback(yp_parser_t *parser) {
lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
@ -273,30 +356,42 @@ lex_encoding_changed_callback(yp_parser_t *parser) {
// Return an array of tokens corresponding to the given source.
static VALUE
lex_source(source_t *source, char *filepath) {
lex_input(input_t *input, const char *filepath) {
yp_parser_t parser;
yp_parser_init(&parser, source->source, source->size, filepath);
yp_parser_init(&parser, input->source, input->size, filepath);
yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback);
VALUE offsets = rb_ary_new();
VALUE source_argv[] = { rb_str_new(input->source, input->size), offsets };
VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
lex_data_t lex_data = {
.source = source,
.tokens = rb_ary_new(),
.encoding = rb_utf8_encoding()
};
void *data = (void *) &lex_data;
lex_data_t *data = &lex_data;
yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
.data = data,
.data = (void *) data,
.callback = lex_token,
};
parser.lex_callback = &lex_callback;
yp_node_t *node = yp_parse(&parser);
// Here we need to update the source range to have the correct newline
// offsets. We do it here because we've already created the object and given
// it over to all of the tokens.
for (size_t index = 0; index < parser.newline_list.size; index++) {
rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
}
VALUE result_argv[] = {
lex_data.tokens,
parser_comments(&parser),
parser_errors(&parser, lex_data.encoding),
parser_warnings(&parser, lex_data.encoding)
parser_comments(&parser, source),
parser_errors(&parser, lex_data.encoding, source),
parser_warnings(&parser, lex_data.encoding, source)
};
VALUE result = rb_class_new_instance(4, result_argv, rb_cYARPParseResult);
@ -309,40 +404,49 @@ lex_source(source_t *source, char *filepath) {
// Return an array of tokens corresponding to the given string.
static VALUE
lex(VALUE self, VALUE string, VALUE filepath) {
source_t source;
source_string_load(&source, string);
char *filepath_char = NULL;
if (filepath) {
filepath_char = StringValueCStr(filepath);
}
return lex_source(&source, filepath_char);
lex(int argc, VALUE *argv, VALUE self) {
VALUE string;
VALUE filepath;
rb_scan_args(argc, argv, "11", &string, &filepath);
input_t input;
input_load_string(&input, string);
return lex_input(&input, check_filepath(filepath));
}
// Return an array of tokens corresponding to the given file.
static VALUE
lex_file(VALUE self, VALUE filepath) {
source_t source;
if (source_file_load(&source, filepath) != 0) return Qnil;
input_t input;
const char *checked = check_filepath(filepath);
if (input_load_filepath(&input, checked) != 0) return Qnil;
VALUE value = lex_input(&input, checked);
input_unload_filepath(&input);
VALUE value = lex_source(&source, StringValueCStr(filepath));
source_file_unload(&source);
return value;
}
/******************************************************************************/
/* Parsing Ruby code */
/******************************************************************************/
// Parse the given input and return a ParseResult instance.
static VALUE
parse_source(source_t *source, char *filepath) {
parse_input(input_t *input, const char *filepath) {
yp_parser_t parser;
yp_parser_init(&parser, source->source, source->size, filepath);
yp_parser_init(&parser, input->source, input->size, filepath);
yp_node_t *node = yp_parse(&parser);
rb_encoding *encoding = rb_enc_find(parser.encoding.name);
VALUE source = yp_source_new(&parser);
VALUE result_argv[] = {
yp_ast_new(&parser, node, encoding),
parser_comments(&parser),
parser_errors(&parser, encoding),
parser_warnings(&parser, encoding)
parser_comments(&parser, source),
parser_errors(&parser, encoding, source),
parser_warnings(&parser, encoding, source)
};
VALUE result = rb_class_new_instance(4, result_argv, rb_cYARPParseResult);
@ -353,40 +457,58 @@ parse_source(source_t *source, char *filepath) {
return result;
}
// Parse the given string and return a ParseResult instance.
static VALUE
parse(VALUE self, VALUE string, VALUE filepath) {
source_t source;
source_string_load(&source, string);
parse(int argc, VALUE *argv, VALUE self) {
VALUE string;
VALUE filepath;
rb_scan_args(argc, argv, "11", &string, &filepath);
input_t input;
input_load_string(&input, string);
#ifdef YARP_DEBUG_MODE_BUILD
char* dup = malloc(source.size);
memcpy(dup, source.source, source.size);
source.source = dup;
char* dup = malloc(input.size);
memcpy(dup, input.source, input.size);
input.source = dup;
#endif
VALUE value = parse_source(&source, NIL_P(filepath) ? NULL : StringValueCStr(filepath));
VALUE value = parse_input(&input, check_filepath(filepath));
#ifdef YARP_DEBUG_MODE_BUILD
free(dup);
#endif
return value;
}
// Parse the given file and return a ParseResult instance.
static VALUE
parse_file(VALUE self, VALUE rb_filepath) {
source_t source;
if (source_file_load(&source, rb_filepath) != 0) {
return Qnil;
}
parse_file(VALUE self, VALUE filepath) {
input_t input;
const char *checked = check_filepath(filepath);
if (input_load_filepath(&input, checked) != 0) return Qnil;
VALUE value = parse_input(&input, checked);
input_unload_filepath(&input);
VALUE value = parse_source(&source, StringValueCStr(rb_filepath));
source_file_unload(&source);
return value;
}
/******************************************************************************/
/* Utility functions exposed to make testing easier */
/******************************************************************************/
// Returns an array of strings corresponding to the named capture groups in the
// given source string. If YARP was unable to parse the regular expression, this
// function returns nil.
static VALUE
named_captures(VALUE self, VALUE rb_source) {
named_captures(VALUE self, VALUE source) {
yp_string_list_t string_list;
yp_string_list_init(&string_list);
if (!yp_regexp_named_capture_group_names(RSTRING_PTR(rb_source), RSTRING_LEN(rb_source), &string_list)) {
if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list)) {
yp_string_list_free(&string_list);
return Qnil;
}
@ -401,6 +523,8 @@ named_captures(VALUE self, VALUE rb_source) {
return names;
}
// Accepts a source string and a type of unescaping and returns the unescaped
// version.
static VALUE
unescape(VALUE source, yp_unescape_type_t unescape_type) {
yp_string_t string;
@ -409,7 +533,13 @@ unescape(VALUE source, yp_unescape_type_t unescape_type) {
yp_list_t error_list;
yp_list_init(&error_list);
yp_unescape_manipulate_string(RSTRING_PTR(source), RSTRING_LEN(source), &string, unescape_type, &error_list);
const char *start = RSTRING_PTR(source);
size_t length = RSTRING_LEN(source);
yp_parser_t parser;
yp_parser_init(&parser, start, length, "");
yp_unescape_manipulate_string(&parser, start, length, &string, unescape_type, &error_list);
if (yp_list_empty_p(&error_list)) {
result = rb_str_new(yp_string_source(&string), yp_string_length(&string));
} else {
@ -418,27 +548,32 @@ unescape(VALUE source, yp_unescape_type_t unescape_type) {
yp_string_free(&string);
yp_list_free(&error_list);
yp_parser_free(&parser);
return result;
}
// Do not unescape anything in the given string. This is here to provide a
// consistent API.
static VALUE
unescape_none(VALUE self, VALUE source) {
return unescape(source, YP_UNESCAPE_NONE);
}
// Minimally unescape the given string. This means effectively unescaping just
// the quotes of a string. Returns the unescaped string.
static VALUE
unescape_minimal(VALUE self, VALUE source) {
return unescape(source, YP_UNESCAPE_MINIMAL);
}
// Unescape everything in the given string. Return the unescaped string.
static VALUE
unescape_all(VALUE self, VALUE source) {
return unescape(source, YP_UNESCAPE_ALL);
}
// This function returns a hash of information about the given source string's
// memory usage.
// Return a hash of information about the given source string's memory usage.
static VALUE
memsize(VALUE self, VALUE string) {
yp_parser_t parser;
@ -459,28 +594,17 @@ memsize(VALUE self, VALUE string) {
return result;
}
static VALUE
compile(VALUE self, VALUE string) {
yp_parser_t parser;
size_t length = RSTRING_LEN(string);
yp_parser_init(&parser, RSTRING_PTR(string), length, NULL);
yp_node_t *node = yp_parse(&parser);
VALUE result = yp_compile(node);
yp_node_destroy(&parser, node);
yp_parser_free(&parser);
return result;
}
// Parse the file, but do nothing with the result. This is used to profile the
// parser for memory and speed.
static VALUE
profile_file(VALUE self, VALUE filepath) {
source_t source;
if (source_file_load(&source, filepath) != 0) return Qnil;
input_t input;
const char *checked = check_filepath(filepath);
if (input_load_filepath(&input, checked) != 0) return Qnil;
yp_parser_t parser;
yp_parser_init(&parser, source.source, source.size, StringValueCStr(filepath));
yp_parser_init(&parser, input.source, input.size, checked);
yp_node_t *node = yp_parse(&parser);
yp_node_destroy(&parser, node);
@ -491,9 +615,8 @@ profile_file(VALUE self, VALUE filepath) {
// The function takes a source string and returns a Ruby array containing the
// offsets of every newline in the string. (It also includes a 0 at the
// beginning to indicate the position of the first line.)
//
// It accepts a string as its only argument and returns an array of integers.
// beginning to indicate the position of the first line.) It accepts a string as
// its only argument and returns an array of integers.
static VALUE
newlines(VALUE self, VALUE string) {
yp_parser_t parser;
@ -512,46 +635,56 @@ newlines(VALUE self, VALUE string) {
return result;
}
/******************************************************************************/
/* Initialization of the extension */
/******************************************************************************/
RUBY_FUNC_EXPORTED void
Init_yarp(void) {
// Make sure that the YARP library version matches the expected version.
// Otherwise something was compiled incorrectly.
if (strcmp(yp_version(), EXPECTED_YARP_VERSION) != 0) {
rb_raise(rb_eRuntimeError, "The YARP library version (%s) does not match the expected version (%s)", yp_version(),
EXPECTED_YARP_VERSION);
rb_raise(
rb_eRuntimeError,
"The YARP library version (%s) does not match the expected version (%s)",
yp_version(),
EXPECTED_YARP_VERSION
);
}
// Grab up references to all of the constants that we're going to need to
// reference throughout this extension.
rb_cYARP = rb_define_module("YARP");
rb_cYARPSource = rb_define_class_under(rb_cYARP, "Source", rb_cObject);
rb_cYARPToken = rb_define_class_under(rb_cYARP, "Token", rb_cObject);
rb_cYARPLocation = rb_define_class_under(rb_cYARP, "Location", rb_cObject);
rb_cYARPComment = rb_define_class_under(rb_cYARP, "Comment", rb_cObject);
rb_cYARPParseError = rb_define_class_under(rb_cYARP, "ParseError", rb_cObject);
rb_cYARPParseWarning = rb_define_class_under(rb_cYARP, "ParseWarning", rb_cObject);
rb_cYARPParseResult = rb_define_class_under(rb_cYARP, "ParseResult", rb_cObject);
rb_define_const(rb_cYARP, "VERSION", rb_sprintf("%d.%d.%d", YP_VERSION_MAJOR, YP_VERSION_MINOR, YP_VERSION_PATCH));
// Define the version string here so that we can use the constants defined
// in yarp.h.
rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));
rb_define_singleton_method(rb_cYARP, "dump", dump, 2);
// First, the functions that have to do with lexing and parsing.
rb_define_singleton_method(rb_cYARP, "dump", dump, -1);
rb_define_singleton_method(rb_cYARP, "dump_file", dump_file, 1);
rb_define_singleton_method(rb_cYARP, "lex", lex, 2);
rb_define_singleton_method(rb_cYARP, "lex", lex, -1);
rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
rb_define_singleton_method(rb_cYARP, "_parse", parse, 2);
rb_define_singleton_method(rb_cYARP, "parse", parse, -1);
rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);
// Next, the functions that will be called by the parser to perform various
// internal tasks. We expose these to make them easier to test.
rb_define_singleton_method(rb_cYARP, "named_captures", named_captures, 1);
rb_define_singleton_method(rb_cYARP, "unescape_none", unescape_none, 1);
rb_define_singleton_method(rb_cYARP, "unescape_minimal", unescape_minimal, 1);
rb_define_singleton_method(rb_cYARP, "unescape_all", unescape_all, 1);
rb_define_singleton_method(rb_cYARP, "memsize", memsize, 1);
rb_define_singleton_method(rb_cYARP, "compile", compile, 1);
rb_define_singleton_method(rb_cYARP, "profile_file", profile_file, 1);
rb_define_singleton_method(rb_cYARP, "newlines", newlines, 1);
// Next, initialize the pack API.
Init_yarp_pack();
}

View File

@ -5,11 +5,11 @@
#include <ruby/encoding.h>
#include "yarp.h"
#include <fcntl.h>
// The following headers are necessary to read files using demand paging.
#ifdef _WIN32
#include <windows.h>
#else
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
@ -17,16 +17,11 @@
#define EXPECTED_YARP_VERSION "0.4.0"
VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding);
VALUE yp_source_new(yp_parser_t *parser);
VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
VALUE yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding);
VALUE yp_compile(yp_node_t *node);
void Init_yarp_pack(void);
YP_EXPORTED_FUNCTION void Init_yarp(void);
#define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v))
#endif // YARP_EXT_NODE_H
#endif

View File

@ -1,20 +0,0 @@
#ifndef YARP_MISSING_H
#define YARP_MISSING_H
#include "yarp/defines.h"
#include <ctype.h>
#include <stddef.h>
#include <string.h>
const char * yp_strnstr(const char *haystack, const char *needle, size_t length);
int yp_strncasecmp(const char *string1, const char *string2, size_t length);
#ifndef HAVE_STRNCASECMP
#ifndef strncasecmp
#define strncasecmp yp_strncasecmp
#endif
#endif
#endif

View File

@ -2,8 +2,6 @@
#define YARP_NODE_H
#include "yarp/defines.h"
#include "yarp.h"
#include "yarp/parser.h"
// Append a token to the given list.
@ -15,6 +13,20 @@ void yp_node_list_append(yp_node_list_t *list, yp_node_t *node);
// Clear the node but preserves the location.
void yp_node_clear(yp_node_t *node);
// Deallocate a node and all of its children.
YP_EXPORTED_FUNCTION void yp_node_destroy(yp_parser_t *parser, struct yp_node *node);
// This struct stores the information gathered by the yp_node_memsize function.
// It contains both the memory footprint and additionally metadata about the
// shape of the tree.
typedef struct {
size_t memsize;
size_t node_count;
} yp_memsize_t;
// Calculates the memory footprint of a given node.
YP_EXPORTED_FUNCTION void yp_node_memsize(yp_node_t *node, yp_memsize_t *memsize);
#define YP_EMPTY_NODE_LIST ((yp_node_list_t) { .nodes = NULL, .size = 0, .capacity = 0 })
#define YP_EMPTY_LOCATION_LIST ((yp_location_list_t) { .locations = NULL, .size = 0, .capacity = 0 })

View File

@ -3,8 +3,8 @@
#include "yarp/defines.h"
#include <stdlib.h>
#include <stdint.h>
#include <stdlib.h>
typedef enum yp_pack_version {
YP_PACK_VERSION_3_2_0

View File

@ -1,17 +1,16 @@
#ifndef YARP_PARSER_H
#define YARP_PARSER_H
#include "yarp/defines.h"
#include <stdbool.h>
#include "yarp/ast.h"
#include "yarp/defines.h"
#include "yarp/enc/yp_encoding.h"
#include "yarp/util/yp_constant_pool.h"
#include "yarp/util/yp_list.h"
#include "yarp/util/yp_newline_list.h"
#include "yarp/util/yp_state_stack.h"
#include <stdbool.h>
// This enum provides various bits that represent different kinds of states that
// the lexer can track. This is used to determine which kind of token to return
// based on the context of the parser.

View File

@ -5,6 +5,8 @@
/* if you are looking to modify the */
/* template */
/******************************************************************************/
#include "yarp/defines.h"
#include <stdio.h>
#include "yarp/ast.h"
@ -14,7 +16,7 @@
static void
prettyprint_location(yp_buffer_t *buffer, yp_parser_t *parser, yp_location_t *location) {
char printed[] = "[0000-0000]";
sprintf(printed, "[%04ld-%04ld]", (long int)(location->start - parser->start), (long int)(location->end - parser->start));
yp_snprintf(printed, sizeof(printed), "[%04ld-%04ld]", (long int)(location->start - parser->start), (long int)(location->end - parser->start));
yp_buffer_append_str(buffer, printed, strlen(printed));
}
@ -189,7 +191,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
for (uint32_t index = 0; index < ((yp_block_node_t *)node)->locals.size; index++) {
if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
char locals_buffer[12];
sprintf(locals_buffer, "%u", ((yp_block_node_t *)node)->locals.ids[index]);
yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_block_node_t *)node)->locals.ids[index]);
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
}
yp_buffer_append_str(buffer, ", ", 2); if (((yp_block_node_t *)node)->parameters == NULL) {
@ -291,7 +293,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
prettyprint_node(buffer, parser, (yp_node_t *)((yp_call_node_t *)node)->block);
}
yp_buffer_append_str(buffer, ", ", 2); char flags_buffer[12];
sprintf(flags_buffer, "+%d", ((yp_call_node_t *)node)->flags);
yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_call_node_t *)node)->flags);
yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
yp_buffer_append_str(buffer, ", ", 2); yp_buffer_append_str(buffer, "\"", 1);
yp_buffer_append_str(buffer, yp_string_source(&((yp_call_node_t *)node)->name), yp_string_length(&((yp_call_node_t *)node)->name));
@ -321,7 +323,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_call_operator_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_call_operator_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char operator_id_buffer[12];
sprintf(operator_id_buffer, "%u", ((yp_call_operator_write_node_t *)node)->operator_id);
yp_snprintf(operator_id_buffer, sizeof(operator_id_buffer), "%u", ((yp_call_operator_write_node_t *)node)->operator_id);
yp_buffer_append_str(buffer, operator_id_buffer, strlen(operator_id_buffer));
yp_buffer_append_str(buffer, ")", 1);
break;
@ -360,7 +362,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
for (uint32_t index = 0; index < ((yp_class_node_t *)node)->locals.size; index++) {
if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
char locals_buffer[12];
sprintf(locals_buffer, "%u", ((yp_class_node_t *)node)->locals.ids[index]);
yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_class_node_t *)node)->locals.ids[index]);
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
}
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_class_node_t *)node)->class_keyword_loc);
@ -406,7 +408,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_class_variable_operator_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_class_variable_operator_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12];
sprintf(operator_buffer, "%u", ((yp_class_variable_operator_write_node_t *)node)->operator);
yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_class_variable_operator_write_node_t *)node)->operator);
yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
yp_buffer_append_str(buffer, ")", 1);
break;
@ -454,7 +456,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_constant_operator_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_constant_operator_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12];
sprintf(operator_buffer, "%u", ((yp_constant_operator_write_node_t *)node)->operator);
yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_constant_operator_write_node_t *)node)->operator);
yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
yp_buffer_append_str(buffer, ")", 1);
break;
@ -493,7 +495,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_constant_path_operator_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_constant_path_operator_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12];
sprintf(operator_buffer, "%u", ((yp_constant_path_operator_write_node_t *)node)->operator);
yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_constant_path_operator_write_node_t *)node)->operator);
yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
yp_buffer_append_str(buffer, ")", 1);
break;
@ -540,7 +542,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); for (uint32_t index = 0; index < ((yp_def_node_t *)node)->locals.size; index++) {
if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
char locals_buffer[12];
sprintf(locals_buffer, "%u", ((yp_def_node_t *)node)->locals.ids[index]);
yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_def_node_t *)node)->locals.ids[index]);
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
}
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_def_node_t *)node)->def_keyword_loc);
@ -734,7 +736,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_global_variable_operator_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_global_variable_operator_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12];
sprintf(operator_buffer, "%u", ((yp_global_variable_operator_write_node_t *)node)->operator);
yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_global_variable_operator_write_node_t *)node)->operator);
yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
yp_buffer_append_str(buffer, ")", 1);
break;
@ -871,7 +873,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_instance_variable_operator_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_instance_variable_operator_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12];
sprintf(operator_buffer, "%u", ((yp_instance_variable_operator_write_node_t *)node)->operator);
yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_instance_variable_operator_write_node_t *)node)->operator);
yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
yp_buffer_append_str(buffer, ")", 1);
break;
@ -911,7 +913,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
}
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_interpolated_regular_expression_node_t *)node)->closing_loc);
yp_buffer_append_str(buffer, ", ", 2); char flags_buffer[12];
sprintf(flags_buffer, "+%d", ((yp_interpolated_regular_expression_node_t *)node)->flags);
yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_interpolated_regular_expression_node_t *)node)->flags);
yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
yp_buffer_append_str(buffer, ")", 1);
break;
@ -1001,7 +1003,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
for (uint32_t index = 0; index < ((yp_lambda_node_t *)node)->locals.size; index++) {
if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
char locals_buffer[12];
sprintf(locals_buffer, "%u", ((yp_lambda_node_t *)node)->locals.ids[index]);
yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_lambda_node_t *)node)->locals.ids[index]);
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
}
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_lambda_node_t *)node)->opening_loc);
@ -1024,7 +1026,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_local_variable_operator_and_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_and_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char constant_id_buffer[12];
sprintf(constant_id_buffer, "%u", ((yp_local_variable_operator_and_write_node_t *)node)->constant_id);
yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_operator_and_write_node_t *)node)->constant_id);
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
yp_buffer_append_str(buffer, ")", 1);
break;
@ -1035,7 +1037,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_local_variable_operator_or_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_or_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char constant_id_buffer[12];
sprintf(constant_id_buffer, "%u", ((yp_local_variable_operator_or_write_node_t *)node)->constant_id);
yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_operator_or_write_node_t *)node)->constant_id);
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
yp_buffer_append_str(buffer, ")", 1);
break;
@ -1046,10 +1048,10 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_local_variable_operator_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char constant_id_buffer[12];
sprintf(constant_id_buffer, "%u", ((yp_local_variable_operator_write_node_t *)node)->constant_id);
yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_operator_write_node_t *)node)->constant_id);
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
yp_buffer_append_str(buffer, ", ", 2); char operator_id_buffer[12];
sprintf(operator_id_buffer, "%u", ((yp_local_variable_operator_write_node_t *)node)->operator_id);
yp_snprintf(operator_id_buffer, sizeof(operator_id_buffer), "%u", ((yp_local_variable_operator_write_node_t *)node)->operator_id);
yp_buffer_append_str(buffer, operator_id_buffer, strlen(operator_id_buffer));
yp_buffer_append_str(buffer, ")", 1);
break;
@ -1057,10 +1059,10 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
yp_buffer_append_str(buffer, "LocalVariableReadNode(", 22);
char constant_id_buffer[12];
sprintf(constant_id_buffer, "%u", ((yp_local_variable_read_node_t *)node)->constant_id);
yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_read_node_t *)node)->constant_id);
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
yp_buffer_append_str(buffer, ", ", 2); char depth_buffer[12];
sprintf(depth_buffer, "+%d", ((yp_local_variable_read_node_t *)node)->depth);
yp_snprintf(depth_buffer, sizeof(depth_buffer), "+%d", ((yp_local_variable_read_node_t *)node)->depth);
yp_buffer_append_str(buffer, depth_buffer, strlen(depth_buffer));
yp_buffer_append_str(buffer, ")", 1);
break;
@ -1068,10 +1070,10 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
case YP_NODE_LOCAL_VARIABLE_WRITE_NODE: {
yp_buffer_append_str(buffer, "LocalVariableWriteNode(", 23);
char constant_id_buffer[12];
sprintf(constant_id_buffer, "%u", ((yp_local_variable_write_node_t *)node)->constant_id);
yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_write_node_t *)node)->constant_id);
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
yp_buffer_append_str(buffer, ", ", 2); char depth_buffer[12];
sprintf(depth_buffer, "+%d", ((yp_local_variable_write_node_t *)node)->depth);
yp_snprintf(depth_buffer, sizeof(depth_buffer), "+%d", ((yp_local_variable_write_node_t *)node)->depth);
yp_buffer_append_str(buffer, depth_buffer, strlen(depth_buffer));
yp_buffer_append_str(buffer, ", ", 2); if (((yp_local_variable_write_node_t *)node)->value == NULL) {
yp_buffer_append_str(buffer, "nil", 3);
@ -1113,7 +1115,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
for (uint32_t index = 0; index < ((yp_module_node_t *)node)->locals.size; index++) {
if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
char locals_buffer[12];
sprintf(locals_buffer, "%u", ((yp_module_node_t *)node)->locals.ids[index]);
yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_module_node_t *)node)->locals.ids[index]);
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
}
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_module_node_t *)node)->module_keyword_loc);
@ -1187,7 +1189,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
case YP_NODE_OPTIONAL_PARAMETER_NODE: {
yp_buffer_append_str(buffer, "OptionalParameterNode(", 22);
char constant_id_buffer[12];
sprintf(constant_id_buffer, "%u", ((yp_optional_parameter_node_t *)node)->constant_id);
yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_optional_parameter_node_t *)node)->constant_id);
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_optional_parameter_node_t *)node)->name_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_optional_parameter_node_t *)node)->operator_loc);
@ -1298,7 +1300,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
for (uint32_t index = 0; index < ((yp_program_node_t *)node)->locals.size; index++) {
if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
char locals_buffer[12];
sprintf(locals_buffer, "%u", ((yp_program_node_t *)node)->locals.ids[index]);
yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_program_node_t *)node)->locals.ids[index]);
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
}
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_program_node_t *)node)->statements);
@ -1319,7 +1321,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
}
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_range_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); char flags_buffer[12];
sprintf(flags_buffer, "+%d", ((yp_range_node_t *)node)->flags);
yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_range_node_t *)node)->flags);
yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
yp_buffer_append_str(buffer, ")", 1);
break;
@ -1344,7 +1346,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, yp_string_source(&((yp_regular_expression_node_t *)node)->unescaped), yp_string_length(&((yp_regular_expression_node_t *)node)->unescaped));
yp_buffer_append_str(buffer, "\"", 1);
yp_buffer_append_str(buffer, ", ", 2); char flags_buffer[12];
sprintf(flags_buffer, "+%d", ((yp_regular_expression_node_t *)node)->flags);
yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_regular_expression_node_t *)node)->flags);
yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
yp_buffer_append_str(buffer, ")", 1);
break;
@ -1363,7 +1365,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
case YP_NODE_REQUIRED_PARAMETER_NODE: {
yp_buffer_append_str(buffer, "RequiredParameterNode(", 22);
char constant_id_buffer[12];
sprintf(constant_id_buffer, "%u", ((yp_required_parameter_node_t *)node)->constant_id);
yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_required_parameter_node_t *)node)->constant_id);
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
yp_buffer_append_str(buffer, ")", 1);
break;
@ -1443,7 +1445,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
for (uint32_t index = 0; index < ((yp_singleton_class_node_t *)node)->locals.size; index++) {
if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
char locals_buffer[12];
sprintf(locals_buffer, "%u", ((yp_singleton_class_node_t *)node)->locals.ids[index]);
yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_singleton_class_node_t *)node)->locals.ids[index]);
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
}
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_singleton_class_node_t *)node)->class_keyword_loc);

View File

@ -374,7 +374,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
case '#': { // inline comments
bool found = yp_regexp_char_find(parser, ')');
// the close paren we found is escaped, we need to find another
while (parser->start <= parser->cursor - 2 && *(parser->cursor - 2) == '\\') {
while (found && (parser->start <= parser->cursor - 2) && (*(parser->cursor - 2) == '\\')) {
found = yp_regexp_char_find(parser, ')');
}
return found;

View File

@ -2,15 +2,14 @@
#define YARP_REGEXP_H
#include "yarp/defines.h"
#include "yarp/parser.h"
#include "yarp/util/yp_string_list.h"
#include "yarp/util/yp_string.h"
#include <stdbool.h>
#include <stddef.h>
#include <string.h>
#include "yarp/util/yp_string_list.h"
#include "yarp/util/yp_string.h"
// Parse a regular expression and extract the names of all of the named capture
// groups.
YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures);

View File

@ -438,14 +438,14 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
// \c? or \C-? delete, ASCII 7Fh (DEL)
//
YP_EXPORTED_FUNCTION void
yp_unescape_manipulate_string(const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list) {
yp_unescape_manipulate_string(yp_parser_t *parser, const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list) {
if (unescape_type == YP_UNESCAPE_NONE) {
// If we're not unescaping then we can reference the source directly.
yp_string_shared_init(string, value, value + length);
return;
}
const char *backslash = memchr(value, '\\', length);
const char *backslash = yp_memchr(parser, value, '\\', length);
if (backslash == NULL) {
// Here there are no escapes, so we can reference the source directly.
@ -509,7 +509,7 @@ yp_unescape_manipulate_string(const char *value, size_t length, yp_string_t *str
}
if (end > cursor) {
backslash = memchr(cursor, '\\', (size_t) (end - cursor));
backslash = yp_memchr(parser, cursor, '\\', (size_t) (end - cursor));
} else {
backslash = NULL;
}

View File

@ -2,17 +2,18 @@
#define YARP_UNESCAPE_H
#include "yarp/defines.h"
#include "yarp/diagnostic.h"
#include "yarp/parser.h"
#include "yarp/util/yp_char.h"
#include "yarp/util/yp_list.h"
#include "yarp/util/yp_memchr.h"
#include "yarp/util/yp_string.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include "yarp/diagnostic.h"
#include "yarp/util/yp_char.h"
#include "yarp/util/yp_list.h"
#include "yarp/util/yp_string.h"
// The type of unescape we are performing.
typedef enum {
// When we're creating a string inside of a list literal like %w, we
@ -30,7 +31,7 @@ typedef enum {
// Unescape the contents of the given token into the given string using the
// given unescape mode.
YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list);
YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser, const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list);
YP_EXPORTED_FUNCTION size_t yp_unescape_calculate_difference(const char *value, const char *end, yp_unescape_type_t unescape_type, bool expect_single_codepoint, yp_list_t *error_list);

View File

@ -2,12 +2,11 @@
#define YP_CHAR_H
#include "yarp/defines.h"
#include "yarp/util/yp_newline_list.h"
#include <stdbool.h>
#include <stddef.h>
#include "yarp/util/yp_newline_list.h"
// Returns the number of characters at the start of the string that are
// whitespace. Disallows searching past the given maximum number of characters.
size_t yp_strspn_whitespace(const char *string, ptrdiff_t length);

View File

@ -6,13 +6,13 @@
#ifndef YP_CONSTANT_POOL_H
#define YP_CONSTANT_POOL_H
#include "yarp/defines.h"
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "yarp/defines.h"
typedef uint32_t yp_constant_id_t;
typedef struct {

31
yarp/util/yp_memchr.c Normal file
View File

@ -0,0 +1,31 @@
#include "yarp/util/yp_memchr.h"
#define YP_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
// We need to roll our own memchr to handle cases where the encoding changes and
// we need to search for a character in a buffer that could be the trailing byte
// of a multibyte character.
void *
yp_memchr(yp_parser_t *parser, const void *memory, int character, size_t number) {
if (parser->encoding_changed && parser->encoding.multibyte && character >= YP_MEMCHR_TRAILING_BYTE_MINIMUM) {
const char *source = (const char *) memory;
size_t index = 0;
while (index < number) {
if (source[index] == character) {
return (void *) (source + index);
}
size_t width = parser->encoding.char_width(source + index);
if (width == 0) {
return NULL;
}
index += width;
}
return NULL;
} else {
return memchr(memory, character, number);
}
}

14
yarp/util/yp_memchr.h Normal file
View File

@ -0,0 +1,14 @@
#ifndef YP_MEMCHR_H
#define YP_MEMCHR_H
#include "yarp/defines.h"
#include "yarp/parser.h"
#include <stddef.h>
// We need to roll our own memchr to handle cases where the encoding changes and
// we need to search for a character in a buffer that could be the trailing byte
// of a multibyte character.
void * yp_memchr(yp_parser_t *parser, const void *source, int character, size_t number);
#endif

View File

@ -31,7 +31,7 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
}
assert(cursor >= list->start);
list->offsets[list->size++] = (size_t) (cursor - list->start);
list->offsets[list->size++] = (size_t) (cursor - list->start + 1);
return true;
}

View File

@ -9,13 +9,13 @@
#ifndef YP_NEWLINE_LIST_H
#define YP_NEWLINE_LIST_H
#include <assert.h>
#include <stddef.h>
#include <stdbool.h>
#include <stdlib.h>
#include "yarp/defines.h"
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdlib.h>
// A list of offsets of newlines in a string. The offsets are assumed to be
// sorted/inserted in ascending order.
typedef struct {

14
yarp/util/yp_snprintf.c Normal file
View File

@ -0,0 +1,14 @@
#include "yarp/defines.h"
#ifndef HAVE_SNPRINTF
// In case snprintf isn't present on the system, we provide our own that simply
// forwards to the less-safe sprintf.
int
yp_snprintf(char *dest, YP_ATTRIBUTE_UNUSED size_t size, const char *format, ...) {
va_list args;
va_start(args, format);
int result = vsprintf(dest, format, args);
va_end(args);
return result;
}
#endif

View File

@ -1,11 +1,5 @@
#include "yarp/util/yp_string.h"
// Allocate a new yp_string_t.
yp_string_t *
yp_string_alloc(void) {
return (yp_string_t *) malloc(sizeof(yp_string_t));
}
// Initialize a shared string that is based on initial input.
void
yp_string_shared_init(yp_string_t *string, const char *start, const char *end) {

View File

@ -29,9 +29,6 @@ typedef struct {
} as;
} yp_string_t;
// Allocate a new yp_string_t.
yp_string_t * yp_string_alloc(void);
// Initialize a shared string that is based on initial input.
void yp_string_shared_init(yp_string_t *string, const char *start, const char *end);

View File

@ -2,12 +2,11 @@
#define YARP_STRING_LIST_H
#include "yarp/defines.h"
#include "yarp/util/yp_string.h"
#include <stddef.h>
#include <stdlib.h>
#include "yarp/util/yp_string.h"
typedef struct {
yp_string_t *strings;
size_t length;

View File

@ -1,19 +1,5 @@
#include "yarp/missing.h"
const char *
yp_strnstr(const char *haystack, const char *needle, size_t length) {
size_t needle_length = strlen(needle);
if (needle_length > length) return NULL;
const char *haystack_limit = haystack + length - needle_length + 1;
while ((haystack = memchr(haystack, needle[0], (size_t) (haystack_limit - haystack))) != NULL) {
if (!strncmp(haystack, needle, needle_length)) return haystack;
haystack++;
}
return NULL;
}
#include <ctype.h>
#include <stddef.h>
int
yp_strncasecmp(const char *string1, const char *string2, size_t length) {

View File

@ -1,5 +1,42 @@
#include "yarp/util/yp_strpbrk.h"
// This is the slow path that does care about the encoding.
static inline const char *
yp_strpbrk_multi_byte(yp_parser_t *parser, const char *source, const char *charset, size_t maximum) {
size_t index = 0;
while (index < maximum) {
if (strchr(charset, source[index]) != NULL) {
return source + index;
}
size_t width = parser->encoding.char_width(source + index);
if (width == 0) {
return NULL;
}
index += width;
}
return NULL;
}
// This is the fast path that does not care about the encoding.
static inline const char *
yp_strpbrk_single_byte(const char *source, const char *charset, size_t maximum) {
size_t index = 0;
while (index < maximum) {
if (strchr(charset, source[index]) != NULL) {
return source + index;
}
index++;
}
return NULL;
}
// Here we have rolled our own version of strpbrk. The standard library strpbrk
// has undefined behavior when the source string is not null-terminated. We want
// to support strings that are not null-terminated because yp_parse does not
@ -12,19 +49,18 @@
// also don't want it to stop on null bytes. Ruby actually allows null bytes
// within strings, comments, regular expressions, etc. So we need to be able to
// skip past them.
//
// Finally, we want to support encodings wherein the charset could contain
// characters that are trailing bytes of multi-byte characters. For example, in
// Shift-JIS, the backslash character can be a trailing byte. In that case we
// need to take a slower path and iterate one multi-byte character at a time.
const char *
yp_strpbrk(const char *source, const char *charset, ptrdiff_t length) {
if (length < 0) return NULL;
size_t index = 0;
size_t maximum = (size_t) length;
while (index < maximum) {
if (strchr(charset, source[index]) != NULL) {
return &source[index];
}
index++;
yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length) {
if (length <= 0) {
return NULL;
} else if (parser->encoding_changed && parser->encoding.multibyte) {
return yp_strpbrk_multi_byte(parser, source, charset, (size_t) length);
} else {
return yp_strpbrk_single_byte(source, charset, (size_t) length);
}
return NULL;
}

View File

@ -2,6 +2,7 @@
#define YP_STRPBRK_H
#include "yarp/defines.h"
#include "yarp/parser.h"
#include <stddef.h>
#include <string.h>
@ -18,6 +19,11 @@
// also don't want it to stop on null bytes. Ruby actually allows null bytes
// within strings, comments, regular expressions, etc. So we need to be able to
// skip past them.
const char * yp_strpbrk(const char *source, const char *charset, ptrdiff_t length);
//
// Finally, we want to support encodings wherein the charset could contain
// characters that are trailing bytes of multi-byte characters. For example, in
// Shift-JIS, the backslash character can be a trailing byte. In that case we
// need to take a slower path and iterate one multi-byte character at a time.
const char * yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length);
#endif

5
yarp/version.h Normal file
View File

@ -0,0 +1,5 @@
#define YP_VERSION_MAJOR 0
#define YP_VERSION_MINOR 4
#define YP_VERSION_PATCH 0
#define YP_VERSION "0.4.0"

View File

@ -1,16 +1,19 @@
#include "yarp.h"
#include "yarp/version.h"
#define YP_STRINGIZE0(expr) #expr
#define YP_STRINGIZE(expr) YP_STRINGIZE0(expr)
#define YP_VERSION_MACRO YP_STRINGIZE(YP_VERSION_MAJOR) "." YP_STRINGIZE(YP_VERSION_MINOR) "." YP_STRINGIZE(YP_VERSION_PATCH)
#define YP_TAB_WHITESPACE_SIZE 8
// The YARP version and the serialization format.
const char *
yp_version(void) {
return YP_VERSION_MACRO;
return YP_VERSION;
}
// In heredocs, tabs automatically complete up to the next 8 spaces. This is
// defined in CRuby as TAB_WIDTH.
#define YP_TAB_WHITESPACE_SIZE 8
// Debugging logging will provide you will additional debugging functions as
// well as automatically replace some functions with their debugging
// counterparts.
#ifndef YP_DEBUG_LOGGING
#define YP_DEBUG_LOGGING 0
#endif
@ -442,6 +445,7 @@ not_provided(yp_parser_t *parser) {
return (yp_token_t) { .type = YP_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
}
#define YP_EMPTY_STRING ((yp_string_t) { .type = YP_STRING_SHARED, .as.shared.start = NULL, .as.shared.end = NULL })
#define YP_LOCATION_NULL_VALUE(parser) ((yp_location_t) { .start = parser->start, .end = parser->start })
#define YP_LOCATION_TOKEN_VALUE(token) ((yp_location_t) { .start = (token)->start, .end = (token)->end })
#define YP_LOCATION_NODE_VALUE(node) ((yp_location_t) { .start = (node)->location.start, .end = (node)->location.end })
@ -675,7 +679,9 @@ yp_array_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *node
.constant = NULL,
.rest = NULL,
.requireds = YP_EMPTY_NODE_LIST,
.posts = YP_EMPTY_NODE_LIST
.posts = YP_EMPTY_NODE_LIST,
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
};
// For now we're going to just copy over each pointer manually. This could be
@ -684,7 +690,7 @@ yp_array_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *node
for (size_t index = 0; index < nodes->size; index++) {
yp_node_t *child = nodes->nodes[index];
if (child->type == YP_NODE_SPLAT_NODE) {
if (!found_rest && child->type == YP_NODE_SPLAT_NODE) {
node->rest = child;
found_rest = true;
} else if (found_rest) {
@ -710,7 +716,9 @@ yp_array_pattern_node_rest_create(yp_parser_t *parser, yp_node_t *rest) {
.constant = NULL,
.rest = rest,
.requireds = YP_EMPTY_NODE_LIST,
.posts = YP_EMPTY_NODE_LIST
.posts = YP_EMPTY_NODE_LIST,
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
};
return node;
@ -1885,7 +1893,9 @@ yp_find_pattern_node_create(yp_parser_t *parser, yp_node_list_t *nodes) {
.constant = NULL,
.left = left,
.right = right,
.requireds = YP_EMPTY_NODE_LIST
.requireds = YP_EMPTY_NODE_LIST,
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
};
// For now we're going to just copy over each pointer manually. This could be
@ -2018,7 +2028,9 @@ yp_hash_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *assoc
},
.constant = NULL,
.kwrest = NULL,
.assocs = YP_EMPTY_NODE_LIST
.assocs = YP_EMPTY_NODE_LIST,
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
};
for (size_t index = 0; index < assocs->size; index++) {
@ -3709,7 +3721,8 @@ yp_string_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_t
},
.opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.content_loc = YP_LOCATION_TOKEN_VALUE(content),
.closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing)
.closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
.unescaped = YP_EMPTY_STRING
};
return node;
@ -3766,7 +3779,8 @@ yp_symbol_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_t
},
.opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.value_loc = YP_LOCATION_TOKEN_VALUE(value),
.closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing)
.closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
.unescaped = YP_EMPTY_STRING
};
return node;
@ -3788,7 +3802,7 @@ yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
ptrdiff_t length = label.end - label.start;
assert(length >= 0);
yp_unescape_manipulate_string(label.start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
yp_unescape_manipulate_string(parser, label.start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
break;
}
case YP_TOKEN_MISSING: {
@ -4073,7 +4087,8 @@ yp_xstring_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_
},
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
.content_loc = YP_LOCATION_TOKEN_VALUE(content),
.closing_loc = YP_LOCATION_TOKEN_VALUE(closing)
.closing_loc = YP_LOCATION_TOKEN_VALUE(closing),
.unescaped = YP_EMPTY_STRING
};
return node;
@ -4113,6 +4128,7 @@ yp_yield_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_lo
}
#undef YP_EMPTY_STRING
#undef YP_LOCATION_NULL_VALUE
#undef YP_LOCATION_TOKEN_VALUE
#undef YP_LOCATION_NODE_VALUE
@ -4331,6 +4347,17 @@ peek(yp_parser_t *parser) {
}
}
// Get the next string of length len in the source starting from parser->current.end.
// If the string extends beyond the end of the source, return the empty string ""
static inline const char*
peek_string(yp_parser_t *parser, size_t len) {
if (parser->current.end + len <= parser->end) {
return parser->current.end;
} else {
return "";
}
}
// If the character to be read matches the given value, then returns true and
// advanced the current pointer.
static inline bool
@ -4342,22 +4369,53 @@ match(yp_parser_t *parser, char value) {
return false;
}
// Skip to the next newline character or NUL byte.
static inline const char *
next_newline(const char *cursor, ptrdiff_t length) {
assert(length >= 0);
// Note that it's okay for us to use memchr here to look for \n because none
// of the encodings that we support have \n as a component of a multi-byte
// character.
return memchr(cursor, '\n', (size_t) length);
}
// Find the start of the encoding comment. This is effectively an inlined
// version of strnstr with some modifications.
static inline const char *
parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdiff_t remaining) {
assert(remaining >= 0);
size_t length = (size_t) remaining;
size_t key_length = strlen("coding:");
if (key_length > length) return NULL;
const char *cursor_limit = cursor + length - key_length + 1;
while ((cursor = yp_memchr(parser, cursor, 'c', (size_t) (cursor_limit - cursor))) != NULL) {
if (
(strncmp(cursor, "coding", key_length - 1) == 0) &&
(cursor[key_length - 1] == ':' || cursor[key_length - 1] == '=')
) {
return cursor + key_length;
}
cursor++;
}
return NULL;
}
// Here we're going to check if this is a "magic" comment, and perform whatever
// actions are necessary for it here.
static void
parser_lex_encoding_comment(yp_parser_t *parser) {
const char *start = parser->current.start + 1;
const char *end = memchr(start, '\n', (size_t) (parser->end - start));
const char *end = next_newline(start, parser->end - start);
if (end == NULL) end = parser->end;
// These are the patterns we're going to match to find the encoding comment.
// This is definitely not complete or even really correct.
const char *encoding_start = NULL;
if ((encoding_start = yp_strnstr(start, "coding:", (size_t) (end - start))) != NULL) {
encoding_start += 7;
} else if ((encoding_start = yp_strnstr(start, "coding=", (size_t) (end - start))) != NULL) {
encoding_start += 7;
}
const char *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
// If we didn't find anything that matched our patterns, then return. Note
// that this does a _very_ poor job of actually finding the encoding, and
@ -4370,7 +4428,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
// Now determine the end of the encoding string. This is either the end of
// the line, the first whitespace character, or a punctuation mark.
const char *encoding_end = yp_strpbrk(encoding_start, " \t\f\r\v\n;,", end - encoding_start);
const char *encoding_end = yp_strpbrk(parser, encoding_start, " \t\f\r\v\n;,", end - encoding_start);
encoding_end = encoding_end == NULL ? end : encoding_end;
// Finally, we can determine the width of the encoding string.
@ -4392,7 +4450,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
// Extensions like utf-8 can contain extra encoding details like,
// utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
// treat any encoding starting utf-8 as utf-8.
if (strncasecmp(encoding_start, "utf-8", 5) == 0) {
if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, "utf-8", 5) == 0)) {
// We don't need to do anything here because the default encoding is
// already UTF-8. We'll just return.
return;
@ -4401,7 +4459,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
// Next, we're going to loop through each of the encodings that we handle
// explicitly. If we found one that we understand, we'll use that value.
#define ENCODING(value, prebuilt) \
if (width == sizeof(value) - 1 && strncasecmp(encoding_start, value, sizeof(value) - 1) == 0) { \
if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, value, width) == 0) { \
parser->encoding = prebuilt; \
parser->encoding_changed |= true; \
if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
@ -4866,7 +4924,8 @@ static yp_token_type_t
lex_keyword(yp_parser_t *parser, const char *value, yp_lex_state_t state, yp_token_type_t type, yp_token_type_t modifier_type) {
yp_lex_state_t last_state = parser->lex_state;
if (strncmp(parser->current.start, value, strlen(value)) == 0) {
const size_t vlen = strlen(value);
if (parser->current.start + vlen <= parser->end && strncmp(parser->current.start, value, vlen) == 0) {
if (parser->lex_state & YP_LEX_STATE_FNAME) {
lex_state_set(parser, YP_LEX_STATE_ENDFN);
} else {
@ -5275,7 +5334,7 @@ parser_comment(yp_parser_t *parser, yp_comment_type_t type) {
static yp_token_type_t
lex_embdoc(yp_parser_t *parser) {
// First, lex out the EMBDOC_BEGIN token.
const char *newline = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
if (newline == NULL) {
parser->current.end = parser->end;
@ -5300,7 +5359,7 @@ lex_embdoc(yp_parser_t *parser) {
// token here.
if (strncmp(parser->current.end, "=end", 4) == 0 &&
(parser->current.end + 4 == parser->end || yp_char_is_whitespace(parser->current.end[4]))) {
const char *newline = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
if (newline == NULL) {
parser->current.end = parser->end;
@ -5320,7 +5379,7 @@ lex_embdoc(yp_parser_t *parser) {
// Otherwise, we'll parse until the end of the line and return a line of
// embedded documentation.
const char *newline = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
if (newline == NULL) {
parser->current.end = parser->end;
@ -5466,9 +5525,9 @@ parser_lex(yp_parser_t *parser) {
LEX(YP_TOKEN_EOF);
case '#': { // comments
const char *ending = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
const char *ending = next_newline(parser->current.end, parser->end - parser->current.end);
while (ending && ending < parser->end && *ending != '\n') {
ending = memchr(ending + 1, '\n', (size_t) (parser->end - ending));
ending = next_newline(ending + 1, parser->end - ending);
}
parser->current.end = ending == NULL ? parser->end : ending + 1;
@ -5540,7 +5599,7 @@ parser_lex(yp_parser_t *parser) {
// Otherwise we'll return a regular newline.
if (next_content[0] == '#') {
// Here we look for a "." or "&." following a "\n".
const char *following = memchr(next_content, '\n', (size_t) (parser->end - next_content));
const char *following = next_newline(next_content, parser->end - next_content);
while (following && (following < parser->end)) {
following++;
@ -5552,7 +5611,7 @@ parser_lex(yp_parser_t *parser) {
// If there is a comment, then we need to find the end of the
// comment and continue searching from there.
following = memchr(following, '\n', (size_t) (parser->end - following));
following = next_newline(following, parser->end - following);
}
// If the lex state was ignored, or we hit a '.' or a '&.',
@ -5785,7 +5844,7 @@ parser_lex(yp_parser_t *parser) {
// = => =~ == === =begin
case '=':
if (current_token_starts_line(parser) && strncmp(parser->current.end, "begin", 5) == 0 && yp_char_is_whitespace(parser->current.end[5])) {
if (current_token_starts_line(parser) && strncmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_at(parser, 5))) {
yp_token_type_t type = lex_embdoc(parser);
if (type == YP_TOKEN_EOF) {
@ -5848,19 +5907,21 @@ parser_lex(yp_parser_t *parser) {
const char *ident_start = parser->current.end;
size_t width = 0;
if (quote == YP_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
if (parser->current.end >= parser->end) {
parser->current.end = end;
} else if (quote == YP_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
parser->current.end = end;
} else {
if (quote == YP_HEREDOC_QUOTE_NONE) {
parser->current.end += width;
while ((width = char_is_identifier(parser, parser->current.end))) {
while ((parser->current.end < parser->end) && (width = char_is_identifier(parser, parser->current.end))) {
parser->current.end += width;
}
} else {
// If we have quotes, then we're going to go until we find the
// end quote.
while (parser->current.end < parser->end && quote != (yp_heredoc_quote_t) (*parser->current.end)) {
while ((parser->current.end < parser->end) && quote != (yp_heredoc_quote_t) (*parser->current.end)) {
parser->current.end++;
}
}
@ -5882,7 +5943,7 @@ parser_lex(yp_parser_t *parser) {
});
if (parser->heredoc_end == NULL) {
const char *body_start = (const char *) memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
const char *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
if (body_start == NULL) {
// If there is no newline after the heredoc identifier, then
@ -6465,13 +6526,13 @@ parser_lex(yp_parser_t *parser) {
// Here we'll get a list of the places where strpbrk should break,
// and then find the first one.
const char *breakpoints = parser->lex_modes.current->as.list.breakpoints;
const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
while (breakpoint != NULL) {
switch (*breakpoint) {
case '\0':
// If we hit a null byte, skip directly past it.
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break;
case '\\': {
// If we hit escapes, then we need to treat the next token
@ -6492,7 +6553,7 @@ parser_lex(yp_parser_t *parser) {
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
}
breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
break;
}
case ' ':
@ -6517,7 +6578,7 @@ parser_lex(yp_parser_t *parser) {
// that looked like an interpolated class or instance variable
// like "#@" but wasn't actually. In this case we'll just skip
// to the next breakpoint.
breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
break;
}
}
@ -6526,7 +6587,7 @@ parser_lex(yp_parser_t *parser) {
if (*breakpoint == parser->lex_modes.current->as.list.incrementor) {
// If we've hit the incrementor, then we need to skip past it and
// find the next breakpoint.
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
parser->lex_modes.current->as.list.nesting++;
break;
}
@ -6537,7 +6598,7 @@ parser_lex(yp_parser_t *parser) {
// If this terminator doesn't actually close the list, then we need
// to continue on past it.
if (parser->lex_modes.current->as.list.nesting > 0) {
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
parser->lex_modes.current->as.list.nesting--;
break;
}
@ -6577,13 +6638,13 @@ parser_lex(yp_parser_t *parser) {
// regular expression. We'll use strpbrk to find the first of these
// characters.
const char *breakpoints = parser->lex_modes.current->as.regexp.breakpoints;
const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
while (breakpoint != NULL) {
switch (*breakpoint) {
case '\0':
// If we hit a null byte, skip directly past it.
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break;
case '\\': {
// If we hit escapes, then we need to treat the next token
@ -6597,7 +6658,7 @@ parser_lex(yp_parser_t *parser) {
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
}
breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
break;
}
case '#': {
@ -6613,7 +6674,7 @@ parser_lex(yp_parser_t *parser) {
// that looked like an interpolated class or instance variable
// like "#@" but wasn't actually. In this case we'll just skip
// to the next breakpoint.
breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
break;
}
}
@ -6622,7 +6683,7 @@ parser_lex(yp_parser_t *parser) {
if (*breakpoint == parser->lex_modes.current->as.regexp.incrementor) {
// If we've hit the incrementor, then we need to skip past it and
// find the next breakpoint.
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
parser->lex_modes.current->as.regexp.nesting++;
break;
}
@ -6635,7 +6696,7 @@ parser_lex(yp_parser_t *parser) {
if (parser->lex_modes.current->as.regexp.terminator != '\n') {
// If the terminator is not a newline, then we
// can set the next breakpoint and continue.
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break;
}
@ -6646,7 +6707,7 @@ parser_lex(yp_parser_t *parser) {
assert(*breakpoint == parser->lex_modes.current->as.regexp.terminator);
if (parser->lex_modes.current->as.regexp.nesting > 0) {
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
parser->lex_modes.current->as.regexp.nesting--;
break;
}
@ -6694,7 +6755,7 @@ parser_lex(yp_parser_t *parser) {
// These are the places where we need to split up the content of the
// string. We'll use strpbrk to find the first of these characters.
const char *breakpoints = parser->lex_modes.current->as.string.breakpoints;
const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
while (breakpoint != NULL) {
// If we hit the incrementor, then we'll increment then nesting and
@ -6704,7 +6765,7 @@ parser_lex(yp_parser_t *parser) {
*breakpoint == parser->lex_modes.current->as.string.incrementor
) {
parser->lex_modes.current->as.string.nesting++;
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
continue;
}
@ -6715,7 +6776,7 @@ parser_lex(yp_parser_t *parser) {
// If this terminator doesn't actually close the string, then we need
// to continue on past it.
if (parser->lex_modes.current->as.string.nesting > 0) {
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
parser->lex_modes.current->as.string.nesting--;
continue;
}
@ -6762,7 +6823,7 @@ parser_lex(yp_parser_t *parser) {
if (*breakpoint == '\n') {
if (parser->heredoc_end == NULL) {
yp_newline_list_append(&parser->newline_list, breakpoint);
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
continue;
} else {
parser->current.end = breakpoint + 1;
@ -6774,7 +6835,7 @@ parser_lex(yp_parser_t *parser) {
switch (*breakpoint) {
case '\0':
// Skip directly past the null character.
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break;
case '\\': {
// If we hit escapes, then we need to treat the next token
@ -6789,7 +6850,7 @@ parser_lex(yp_parser_t *parser) {
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
}
breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
break;
}
case '#': {
@ -6802,7 +6863,7 @@ parser_lex(yp_parser_t *parser) {
// looked like an interpolated class or instance variable like "#@"
// but wasn't actually. In this case we'll just skip to the next
// breakpoint.
breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
break;
}
default:
@ -6844,7 +6905,7 @@ parser_lex(yp_parser_t *parser) {
start += yp_strspn_inline_whitespace(start, parser->end - start);
}
if (strncmp(start, ident_start, ident_length) == 0) {
if ((start + ident_length <= parser->end) && (strncmp(start, ident_start, ident_length) == 0)) {
bool matched = true;
bool at_end = false;
@ -6888,13 +6949,13 @@ parser_lex(yp_parser_t *parser) {
breakpoints[2] = '\0';
}
const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
while (breakpoint != NULL) {
switch (*breakpoint) {
case '\0':
// Skip directly past the null character.
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break;
case '\n': {
yp_newline_list_append(&parser->newline_list, breakpoint);
@ -6939,7 +7000,7 @@ parser_lex(yp_parser_t *parser) {
// Otherwise we hit a newline and it wasn't followed by a
// terminator, so we can continue parsing.
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break;
}
case '\\': {
@ -6956,7 +7017,7 @@ parser_lex(yp_parser_t *parser) {
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
}
breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
}
break;
}
@ -6970,7 +7031,7 @@ parser_lex(yp_parser_t *parser) {
// that looked like an interpolated class or instance variable
// like "#@" but wasn't actually. In this case we'll just skip
// to the next breakpoint.
breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
break;
}
default:
@ -7007,7 +7068,7 @@ yp_regular_expression_node_create_and_unescape(yp_parser_t *parser, const yp_tok
ptrdiff_t length = content->end - content->start;
assert(length >= 0);
yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
return node;
}
@ -7018,7 +7079,7 @@ yp_symbol_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
ptrdiff_t length = content->end - content->start;
assert(length >= 0);
yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
return node;
}
@ -7029,7 +7090,7 @@ yp_string_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
ptrdiff_t length = content->end - content->start;
assert(length >= 0);
yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
return node;
}
@ -7040,7 +7101,7 @@ yp_xstring_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openi
ptrdiff_t length = content->end - content->start;
assert(length >= 0);
yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
return node;
}
@ -7505,10 +7566,10 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
// the previous method name in, and append an =.
size_t length = yp_string_length(&call->name);
char *name = malloc(length + 2);
char *name = calloc(length + 2, sizeof(char));
if (name == NULL) return NULL;
sprintf(name, "%.*s=", (int) length, yp_string_source(&call->name));
yp_snprintf(name, length + 2, "%.*s=", (int) length, yp_string_source(&call->name));
// Now switch the name to the new string.
yp_string_free(&call->name);
@ -8954,9 +9015,11 @@ parse_string_part(yp_parser_t *parser) {
static yp_node_t *
parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_state) {
bool lex_string = lex_mode->mode == YP_LEX_STRING;
bool lex_interpolation = lex_string && lex_mode->as.string.interpolation;
yp_token_t opening = parser->previous;
if (lex_mode->mode != YP_LEX_STRING) {
if (!lex_string) {
if (next_state != YP_LEX_STATE_NONE) {
lex_state_set(parser, next_state);
}
@ -8990,9 +9053,9 @@ parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_s
}
// If we weren't in a string in the previous check then we have to be now.
assert(lex_mode->mode == YP_LEX_STRING);
assert(lex_string);
if (lex_mode->as.string.interpolation) {
if (lex_interpolation) {
yp_interpolated_symbol_node_t *interpolated = yp_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
@ -9043,9 +9106,10 @@ parse_undef_argument(yp_parser_t *parser) {
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL);
}
case YP_TOKEN_SYMBOL_BEGIN: {
yp_lex_mode_t *lex_mode = parser->lex_modes.current;
yp_lex_mode_t lex_mode = *parser->lex_modes.current;
parser_lex(parser);
return parse_symbol(parser, lex_mode, YP_LEX_STATE_NONE);
return parse_symbol(parser, &lex_mode, YP_LEX_STATE_NONE);
}
default:
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Expected a bare word or symbol argument.");
@ -9075,10 +9139,10 @@ parse_alias_argument(yp_parser_t *parser, bool first) {
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL);
}
case YP_TOKEN_SYMBOL_BEGIN: {
yp_lex_mode_t *lex_mode = parser->lex_modes.current;
yp_lex_mode_t lex_mode = *parser->lex_modes.current;
parser_lex(parser);
return parse_symbol(parser, lex_mode, first ? YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM : YP_LEX_STATE_NONE);
return parse_symbol(parser, &lex_mode, first ? YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM : YP_LEX_STATE_NONE);
}
case YP_TOKEN_BACK_REFERENCE:
parser_lex(parser);
@ -9177,7 +9241,7 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
common_whitespace = cur_whitespace;
}
cur_char = memchr(cur_char + 1, '\n', (size_t) (parser->end - (cur_char + 1)));
cur_char = next_newline(cur_char + 1, parser->end - (cur_char + 1));
if (cur_char) cur_char++;
}
}
@ -9252,7 +9316,7 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
// At this point we have dedented all that we need to, so we need to find
// the next newline.
const char *breakpoint = memchr(source_cursor, '\n', (size_t) (source_end - source_cursor));
const char *breakpoint = next_newline(source_cursor, source_end - source_cursor);
if (breakpoint == NULL) {
// If there isn't another newline, then we can just move the rest of the
@ -9293,92 +9357,106 @@ parse_pattern_constant_path(yp_parser_t *parser, yp_node_t *node) {
// If there is a [ or ( that follows, then this is part of a larger pattern
// expression. We'll parse the inner pattern here, then modify the returned
// inner pattern with our constant path attached.
if (match_any_type_p(parser, 2, YP_TOKEN_BRACKET_LEFT, YP_TOKEN_PARENTHESIS_LEFT)) {
yp_token_t opening;
yp_token_t closing;
yp_node_t *inner = NULL;
if (accept(parser, YP_TOKEN_BRACKET_LEFT)) {
opening = parser->previous;
accept(parser, YP_TOKEN_NEWLINE);
if (!accept(parser, YP_TOKEN_BRACKET_RIGHT)) {
inner = parse_pattern(parser, true, "Expected a pattern expression after the [ operator.");
accept(parser, YP_TOKEN_NEWLINE);
expect(parser, YP_TOKEN_BRACKET_RIGHT, "Expected a ] to close the pattern expression.");
}
closing = parser->previous;
} else {
parser_lex(parser);
opening = parser->previous;
if (!accept(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
inner = parse_pattern(parser, true, "Expected a pattern expression after the ( operator.");
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected a ) to close the pattern expression.");
}
closing = parser->previous;
}
if (inner) {
// Now that we have the inner pattern, check to see if it's an array, find,
// or hash pattern. If it is, then we'll attach our constant path to it. If
// it's not, then we'll create an array pattern.
switch (inner->type) {
case YP_NODE_ARRAY_PATTERN_NODE: {
yp_array_pattern_node_t *pattern_node = (yp_array_pattern_node_t *)inner;
pattern_node->base.location.start = node->location.start;
pattern_node->base.location.end = closing.end;
pattern_node->constant = node;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
node = (yp_node_t *)pattern_node;
break;
}
case YP_NODE_FIND_PATTERN_NODE: {
yp_find_pattern_node_t *pattern_node = (yp_find_pattern_node_t *) inner;
pattern_node->base.location.start = node->location.start;
pattern_node->base.location.end = closing.end;
pattern_node->constant = node;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
node = (yp_node_t *) pattern_node;
break;
}
case YP_NODE_HASH_PATTERN_NODE: {
yp_hash_pattern_node_t *pattern_node = (yp_hash_pattern_node_t *)inner;
pattern_node->base.location.start = node->location.start;
pattern_node->base.location.end = closing.end;
pattern_node->constant = node;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
node = (yp_node_t *) pattern_node;
break;
}
default: {
yp_array_pattern_node_t *pattern_node = yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
yp_array_pattern_node_requireds_append(pattern_node, inner);
node = (yp_node_t *)pattern_node;
break;
}
}
} else {
// If there was no inner pattern, then we have something like Foo() or
// Foo[]. In that case we'll create an array pattern with no requireds.
node = (yp_node_t *)yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
}
if (!match_any_type_p(parser, 2, YP_TOKEN_BRACKET_LEFT, YP_TOKEN_PARENTHESIS_LEFT)) {
return node;
}
return node;
yp_token_t opening;
yp_token_t closing;
yp_node_t *inner = NULL;
if (accept(parser, YP_TOKEN_BRACKET_LEFT)) {
opening = parser->previous;
accept(parser, YP_TOKEN_NEWLINE);
if (!accept(parser, YP_TOKEN_BRACKET_RIGHT)) {
inner = parse_pattern(parser, true, "Expected a pattern expression after the [ operator.");
accept(parser, YP_TOKEN_NEWLINE);
expect(parser, YP_TOKEN_BRACKET_RIGHT, "Expected a ] to close the pattern expression.");
}
closing = parser->previous;
} else {
parser_lex(parser);
opening = parser->previous;
if (!accept(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
inner = parse_pattern(parser, true, "Expected a pattern expression after the ( operator.");
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected a ) to close the pattern expression.");
}
closing = parser->previous;
}
if (!inner) {
// If there was no inner pattern, then we have something like Foo() or
// Foo[]. In that case we'll create an array pattern with no requireds.
return (yp_node_t *) yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
}
// Now that we have the inner pattern, check to see if it's an array, find,
// or hash pattern. If it is, then we'll attach our constant path to it if
// it doesn't already have a constant. If it's not one of those node types
// or it does have a constant, then we'll create an array pattern.
switch (inner->type) {
case YP_NODE_ARRAY_PATTERN_NODE: {
yp_array_pattern_node_t *pattern_node = (yp_array_pattern_node_t *) inner;
if (pattern_node->constant == NULL) {
pattern_node->base.location.start = node->location.start;
pattern_node->base.location.end = closing.end;
pattern_node->constant = node;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
return (yp_node_t *) pattern_node;
}
break;
}
case YP_NODE_FIND_PATTERN_NODE: {
yp_find_pattern_node_t *pattern_node = (yp_find_pattern_node_t *) inner;
if (pattern_node->constant == NULL) {
pattern_node->base.location.start = node->location.start;
pattern_node->base.location.end = closing.end;
pattern_node->constant = node;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
return (yp_node_t *) pattern_node;
}
break;
}
case YP_NODE_HASH_PATTERN_NODE: {
yp_hash_pattern_node_t *pattern_node = (yp_hash_pattern_node_t *) inner;
if (pattern_node->constant == NULL) {
pattern_node->base.location.start = node->location.start;
pattern_node->base.location.end = closing.end;
pattern_node->constant = node;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
return (yp_node_t *) pattern_node;
}
break;
}
default:
break;
}
// If we got here, then we didn't return one of the inner patterns by
// attaching its constant. In this case we'll create an array pattern and
// attach our constant to it.
yp_array_pattern_node_t *pattern_node = yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
yp_array_pattern_node_requireds_append(pattern_node, inner);
return (yp_node_t *) pattern_node;
}
// Parse a rest pattern.
@ -9897,8 +9975,6 @@ parse_pattern(yp_parser_t *parser, bool top_pattern, const char *message) {
// Parse an expression that begins with the previous node that we just lexed.
static inline yp_node_t *
parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
yp_lex_mode_t *lex_mode = parser->lex_modes.current;
switch (parser->current.type) {
case YP_TOKEN_BRACKET_LEFT_ARRAY: {
parser_lex(parser);
@ -11015,7 +11091,10 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
lex_state_set(parser, YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM);
parser_lex(parser);
name = parse_undef_argument(parser);
if (name->type == YP_NODE_MISSING_NODE) break;
if (name->type == YP_NODE_MISSING_NODE) {
yp_node_destroy(parser, name);
break;
}
yp_undef_node_append(undef, name);
}
@ -11043,6 +11122,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
receiver = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, "Expected expression after `not`.");
if (!parser->recovering) {
accept(parser, YP_TOKEN_NEWLINE);
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected ')' after 'not' expression.");
arguments.closing_loc = ((yp_location_t) { .start = parser->previous.start, .end = parser->previous.end });
}
@ -11727,9 +11807,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
return (yp_node_t *) node;
}
case YP_TOKEN_STRING_BEGIN: {
assert(parser->lex_modes.current->mode == YP_LEX_STRING);
bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
yp_token_t opening = parser->current;
parser_lex(parser);
yp_token_t opening = parser->previous;
yp_node_t *node;
if (accept(parser, YP_TOKEN_STRING_END)) {
@ -11754,7 +11837,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
};
return (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
} else if (!lex_mode->as.string.interpolation) {
} else if (!lex_interpolation) {
// If we don't accept interpolation then we expect the string to start
// with a single string content node.
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter.");
@ -11858,9 +11941,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
return node;
}
}
case YP_TOKEN_SYMBOL_BEGIN:
case YP_TOKEN_SYMBOL_BEGIN: {
yp_lex_mode_t lex_mode = *parser->lex_modes.current;
parser_lex(parser);
return parse_symbol(parser, lex_mode, YP_LEX_STATE_END);
return parse_symbol(parser, &lex_mode, YP_LEX_STATE_END);
}
default:
if (context_recoverable(parser, &parser->current)) {
parser->recovering = true;
@ -12482,82 +12568,8 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
return path;
}
case YP_TOKEN_AMPERSAND:
case YP_TOKEN_BACKTICK:
case YP_TOKEN_BANG:
case YP_TOKEN_BANG_EQUAL:
case YP_TOKEN_BANG_TILDE:
case YP_TOKEN_CARET:
case YP_TOKEN_EQUAL_EQUAL:
case YP_TOKEN_EQUAL_EQUAL_EQUAL:
case YP_TOKEN_EQUAL_TILDE:
case YP_TOKEN_GREATER:
case YP_TOKEN_GREATER_EQUAL:
case YP_TOKEN_GREATER_GREATER:
case YP_TOKEN_HEREDOC_START:
case YP_TOKEN_IGNORED_NEWLINE:
case YP_TOKEN_KEYWORD_ALIAS:
case YP_TOKEN_KEYWORD_AND:
case YP_TOKEN_KEYWORD_BEGIN:
case YP_TOKEN_KEYWORD_BEGIN_UPCASE:
case YP_TOKEN_KEYWORD_BREAK:
case YP_TOKEN_KEYWORD_CASE:
case YP_TOKEN_KEYWORD_CLASS:
case YP_TOKEN_KEYWORD_DEF:
case YP_TOKEN_KEYWORD_DEFINED:
case YP_TOKEN_KEYWORD_DO:
case YP_TOKEN_KEYWORD_ELSE:
case YP_TOKEN_KEYWORD_ELSIF:
case YP_TOKEN_KEYWORD_END:
case YP_TOKEN_KEYWORD_END_UPCASE:
case YP_TOKEN_KEYWORD_ENSURE:
case YP_TOKEN_KEYWORD_FALSE:
case YP_TOKEN_KEYWORD_FOR:
case YP_TOKEN_KEYWORD_IF:
case YP_TOKEN_KEYWORD_IN:
case YP_TOKEN_KEYWORD_NEXT:
case YP_TOKEN_KEYWORD_NIL:
case YP_TOKEN_KEYWORD_NOT:
case YP_TOKEN_KEYWORD_OR:
case YP_TOKEN_KEYWORD_REDO:
case YP_TOKEN_KEYWORD_RESCUE:
case YP_TOKEN_KEYWORD_RETRY:
case YP_TOKEN_KEYWORD_RETURN:
case YP_TOKEN_KEYWORD_SELF:
case YP_TOKEN_KEYWORD_SUPER:
case YP_TOKEN_KEYWORD_THEN:
case YP_TOKEN_KEYWORD_TRUE:
case YP_TOKEN_KEYWORD_UNDEF:
case YP_TOKEN_KEYWORD_UNLESS:
case YP_TOKEN_KEYWORD_UNTIL:
case YP_TOKEN_KEYWORD_WHEN:
case YP_TOKEN_KEYWORD_WHILE:
case YP_TOKEN_KEYWORD_YIELD:
case YP_TOKEN_KEYWORD___ENCODING__:
case YP_TOKEN_KEYWORD___FILE__:
case YP_TOKEN_KEYWORD___LINE__:
case YP_TOKEN_LESS:
case YP_TOKEN_LESS_EQUAL:
case YP_TOKEN_LESS_EQUAL_GREATER:
case YP_TOKEN_LESS_LESS:
case YP_TOKEN_MINUS:
case YP_TOKEN_PERCENT:
case YP_TOKEN_PERCENT_LOWER_I:
case YP_TOKEN_PERCENT_LOWER_W:
case YP_TOKEN_PERCENT_LOWER_X:
case YP_TOKEN_PERCENT_UPPER_I:
case YP_TOKEN_PERCENT_UPPER_W:
case YP_TOKEN_PIPE:
case YP_TOKEN_PLUS:
case YP_TOKEN_REGEXP_BEGIN:
case YP_TOKEN_SLASH:
case YP_TOKEN_STAR:
case YP_TOKEN_STAR_STAR:
case YP_TOKEN_TILDE:
case YP_TOKEN_UCOLON_COLON:
case YP_TOKEN_UDOT_DOT:
case YP_TOKEN_UDOT_DOT_DOT:
case YP_TOKEN___END__:
case YP_CASE_OPERATOR:
case YP_CASE_KEYWORD:
case YP_TOKEN_IDENTIFIER: {
parser_lex(parser);
@ -12805,7 +12817,7 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
} else if (size >= 2 && source[0] == '#' && source[1] == '!') {
// If the first two bytes of the source are a shebang, then we'll indicate
// that the encoding comment is at the end of the shebang.
const char *encoding_comment_start = memchr(source, '\n', size);
const char *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
if (encoding_comment_start) {
parser->encoding_comment_start = encoding_comment_start + 1;
}
@ -12891,6 +12903,3 @@ yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer) {
#undef YP_CASE_KEYWORD
#undef YP_CASE_OPERATOR
#undef YP_CASE_WRITABLE
#undef YP_STRINGIZE
#undef YP_STRINGIZE0
#undef YP_VERSION_MACRO

View File

@ -2,19 +2,6 @@
#define YARP_H
#include "yarp/defines.h"
#include <assert.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifndef _WIN32
#include <strings.h>
#endif
#include "yarp/missing.h"
#include "yarp/ast.h"
#include "yarp/diagnostic.h"
#include "yarp/node.h"
@ -24,17 +11,26 @@
#include "yarp/unescape.h"
#include "yarp/util/yp_buffer.h"
#include "yarp/util/yp_char.h"
#include "yarp/util/yp_memchr.h"
#include "yarp/util/yp_strpbrk.h"
#define YP_VERSION_MAJOR 0
#define YP_VERSION_MINOR 4
#define YP_VERSION_PATCH 0
#include <assert.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifndef _WIN32
#include <strings.h>
#endif
void yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
void yp_print_node(yp_parser_t *parser, yp_node_t *node);
// Returns the YARP version and notably the serialization format
// The YARP version and the serialization format.
YP_EXPORTED_FUNCTION const char * yp_version(void);
// Initialize a parser with the given start and end pointers.
@ -57,20 +53,6 @@ YP_EXPORTED_FUNCTION void yp_parser_free(yp_parser_t *parser);
// Parse the Ruby source associated with the given parser and return the tree.
YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser);
// Deallocate a node and all of its children.
YP_EXPORTED_FUNCTION void yp_node_destroy(yp_parser_t *parser, struct yp_node *node);
// This struct stores the information gathered by the yp_node_memsize function.
// It contains both the memory footprint and additionally metadata about the
// shape of the tree.
typedef struct {
size_t memsize;
size_t node_count;
} yp_memsize_t;
// Calculates the memory footprint of a given node.
YP_EXPORTED_FUNCTION void yp_node_memsize(yp_node_t *node, yp_memsize_t *memsize);
// Pretty-prints the AST represented by the given node to the given buffer.
YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);