Manual YARP resync

This commit is contained in:
Jemma Issroff 2023-06-30 14:30:24 -04:00
parent 6f9d1b4b0f
commit bfb933371d
Notes: git 2023-07-05 20:59:16 +00:00
77 changed files with 5222 additions and 5751 deletions

View File

@ -1,17 +1,79 @@
# frozen_string_literal: true # frozen_string_literal: true
module YARP module YARP
# This represents a location in the source corresponding to a node or token. # This represents a source of Ruby code that has been parsed. It is used in
class Location # conjunction with locations to allow them to resolve line numbers and source
attr_reader :start_offset, :length # ranges.
class Source
attr_reader :source, :offsets
def initialize(start_offset, length) def initialize(source, offsets)
@source = source
@offsets = offsets
end
def slice(offset, length)
source.byteslice(offset, length)
end
def line(value)
offsets.bsearch_index { |offset| offset > value } || offsets.length
end
def column(value)
value - offsets[line(value) - 1]
end
end
# This represents a location in the source.
class Location
# A Source object that is used to determine more information from the given
# offset and length.
private attr_reader :source
# The byte offset from the beginning of the source where this location
# starts.
attr_reader :start_offset
# The length of this location in bytes.
attr_reader :length
def initialize(source, start_offset, length)
@source = source
@start_offset = start_offset @start_offset = start_offset
@length = length @length = length
end end
# The source code that this location represents.
def slice
source.slice(start_offset, length)
end
# The byte offset from the beginning of the source where this location ends.
def end_offset def end_offset
@start_offset + @length start_offset + length
end
# The line number where this location starts.
def start_line
source.line(start_offset)
end
# The line number where this location ends.
def end_line
source.line(end_offset - 1)
end
# The column number in bytes where this location starts from the start of
# the line.
def start_column
source.column(start_offset)
end
# The column number in bytes where this location ends from the start of the
# line.
def end_column
source.column(end_offset - 1)
end end
def deconstruct_keys(keys) def deconstruct_keys(keys)
@ -101,21 +163,12 @@ module YARP
# This represents a token from the Ruby source. # This represents a token from the Ruby source.
class Token class Token
attr_reader :type, :value, :start_offset, :length attr_reader :type, :value, :location
def initialize(type, value, start_offset, length) def initialize(type, value, location)
@type = type @type = type
@value = value @value = value
@start_offset = start_offset @location = location
@length = length
end
def end_offset
@start_offset + @length
end
def location
Location.new(@start_offset, @length)
end end
def deconstruct_keys(keys) def deconstruct_keys(keys)
@ -143,20 +196,12 @@ module YARP
# This represents a node in the tree. # This represents a node in the tree.
class Node class Node
attr_reader :start_offset, :length attr_reader :location
def end_offset
@start_offset + @length
end
def location
Location.new(@start_offset, @length)
end
def pretty_print(q) def pretty_print(q)
q.group do q.group do
q.text(self.class.name.split("::").last) q.text(self.class.name.split("::").last)
self.location.pretty_print(q) location.pretty_print(q)
q.text("(") q.text("(")
q.nest(2) do q.nest(2) do
deconstructed = deconstruct_keys([]) deconstructed = deconstruct_keys([])
@ -171,67 +216,10 @@ module YARP
end end
end end
# A class that knows how to walk down the tree. None of the individual visit
# methods are implemented on this visitor, so it forces the consumer to
# implement each one that they need. For a default implementation that
# continues walking the tree, see the Visitor class.
class BasicVisitor
def visit(node)
node&.accept(self)
end
def visit_all(nodes)
nodes.map { |node| visit(node) }
end
def visit_child_nodes(node)
visit_all(node.child_nodes)
end
end
# This lexes with the Ripper lex. It drops any space events but otherwise
# returns the same tokens.
# [raises SyntaxError] if the syntax in source is invalid
def self.lex_ripper(source)
previous = []
results = []
Ripper.lex(source, raise_errors: true).each do |token|
case token[1]
when :on_sp
# skip
when :on_tstring_content
if previous[1] == :on_tstring_content &&
(token[2].start_with?("\#$") || token[2].start_with?("\#@"))
previous[2] << token[2]
else
results << token
previous = token
end
when :on_words_sep
if previous[1] == :on_words_sep
previous[2] << token[2]
else
results << token
previous = token
end
else
results << token
previous = token
end
end
results
end
# Load the serialized AST using the source as a reference into a tree. # Load the serialized AST using the source as a reference into a tree.
def self.load(source, serialized) def self.load(source, serialized)
Serialize.load(source, serialized) Serialize.load(source, serialized)
end end
def self.parse(source, filepath=nil)
_parse(source, filepath)
end
end end
require_relative "yarp/lex_compat" require_relative "yarp/lex_compat"
@ -240,9 +228,3 @@ require_relative "yarp/ripper_compat"
require_relative "yarp/serialize" require_relative "yarp/serialize"
require_relative "yarp/pack" require_relative "yarp/pack"
require "yarp.so" require "yarp.so"
module YARP
class << self
private :_parse
end
end

View File

@ -534,12 +534,11 @@ module YARP
end end
end end
attr_reader :source, :offsets, :filepath attr_reader :source, :filepath
def initialize(source, filepath = "") def initialize(source, filepath = "")
@source = source @source = source
@filepath = filepath || "" @filepath = filepath || ""
@offsets = find_offsets(source)
end end
def result def result
@ -561,7 +560,8 @@ module YARP
result_value[0][0].value.prepend("\xEF\xBB\xBF") if bom result_value[0][0].value.prepend("\xEF\xBB\xBF") if bom
result_value.each_with_index do |(token, lex_state), index| result_value.each_with_index do |(token, lex_state), index|
(lineno, column) = find_location(token.location.start_offset) lineno = token.location.start_line
column = token.location.start_column
column -= index == 0 ? 6 : 3 if bom && lineno == 1 column -= index == 0 ? 6 : 3 if bom && lineno == 1
event = RIPPER.fetch(token.type) event = RIPPER.fetch(token.type)
@ -702,38 +702,6 @@ module YARP
ParseResult.new(tokens, result.comments, result.errors, result.warnings) ParseResult.new(tokens, result.comments, result.errors, result.warnings)
end end
private
# YARP keeps locations around in the form of ranges of byte offsets from the
# start of the file. Ripper keeps locations around in the form of line and
# column numbers. To match the output, we keep a cache of the offsets at the
# beginning of each line.
def find_offsets(source)
last_offset = 0
offsets = [0]
source.each_line do |line|
last_offset += line.bytesize
offsets << last_offset
end
offsets
end
# Given a byte offset, find the line number and column number that it maps
# to. We use a binary search over the cached offsets to find the line number
# that the offset is on, and then subtract the offset of the previous line
# to find the column number.
def find_location(value)
line_number = offsets.bsearch_index { |offset| offset > value }
line_offset = offsets[line_number - 1] if line_number
[
line_number || offsets.length - 1,
value - (line_offset || offsets.last)
]
end
end end
# The constant that wraps the behavior of the lexer to match Ripper's output # The constant that wraps the behavior of the lexer to match Ripper's output
@ -746,4 +714,39 @@ module YARP
def self.lex_compat(source, filepath = "") def self.lex_compat(source, filepath = "")
LexCompat.new(source, filepath).result LexCompat.new(source, filepath).result
end end
# This lexes with the Ripper lex. It drops any space events but otherwise
# returns the same tokens. Raises SyntaxError if the syntax in source is
# invalid.
def self.lex_ripper(source)
previous = []
results = []
Ripper.lex(source, raise_errors: true).each do |token|
case token[1]
when :on_sp
# skip
when :on_tstring_content
if previous[1] == :on_tstring_content &&
(token[2].start_with?("\#$") || token[2].start_with?("\#@"))
previous[2] << token[2]
else
results << token
previous = token
end
when :on_words_sep
if previous[1] == :on_words_sep
previous[2] << token[2]
else
results << token
previous = token
end
else
results << token
previous = token
end
end
results
end
end end

File diff suppressed because it is too large Load Diff

View File

@ -9,26 +9,30 @@ require "stringio"
module YARP module YARP
module Serialize module Serialize
def self.load(source, serialized) def self.load(input, serialized)
io = StringIO.new(serialized) io = StringIO.new(serialized)
io.set_encoding(Encoding::BINARY) io.set_encoding(Encoding::BINARY)
Loader.new(source, serialized, io).load Loader.new(input, serialized, io).load
end end
class Loader class Loader
attr_reader :encoding, :source, :serialized, :io attr_reader :encoding, :input, :serialized, :io
attr_reader :constant_pool_offset, :constant_pool attr_reader :constant_pool_offset, :constant_pool, :source
def initialize(source, serialized, io) def initialize(input, serialized, io)
@encoding = Encoding::UTF_8 @encoding = Encoding::UTF_8
@source = source.dup @input = input.dup
@serialized = serialized @serialized = serialized
@io = io @io = io
@constant_pool_offset = nil @constant_pool_offset = nil
@constant_pool = nil @constant_pool = nil
offsets = [0]
input.b.scan("\n") { offsets << $~.end(0) }
@source = Source.new(input, offsets)
end end
def load def load
@ -36,7 +40,7 @@ module YARP
io.read(3).unpack("C3") => [0, 4, 0] io.read(3).unpack("C3") => [0, 4, 0]
@encoding = Encoding.find(io.read(load_varint)) @encoding = Encoding.find(io.read(load_varint))
@source = source.force_encoding(@encoding).freeze @input = input.force_encoding(@encoding).freeze
@constant_pool_offset = io.read(4).unpack1("L") @constant_pool_offset = io.read(4).unpack1("L")
@constant_pool = Array.new(load_varint, nil) @constant_pool = Array.new(load_varint, nil)
@ -78,7 +82,7 @@ module YARP
end end
def load_location def load_location
Location.new(load_varint, load_varint) Location.new(source, load_varint, load_varint)
end end
def load_optional_location def load_optional_location
@ -95,7 +99,7 @@ module YARP
start = serialized.unpack1("L", offset: offset) start = serialized.unpack1("L", offset: offset)
length = serialized.unpack1("L", offset: offset + 4) length = serialized.unpack1("L", offset: offset + 4)
constant = source.byteslice(start, length).to_sym constant = input.byteslice(start, length).to_sym
constant_pool[index] = constant constant_pool[index] = constant
end end
@ -104,262 +108,262 @@ module YARP
def load_node def load_node
type = io.getbyte type = io.getbyte
start_offset, length = load_varint, load_varint location = load_location
case type case type
when 1 then when 1 then
AliasNode.new(load_node, load_node, load_location, start_offset, length) AliasNode.new(load_node, load_node, load_location, location)
when 2 then when 2 then
AlternationPatternNode.new(load_node, load_node, load_location, start_offset, length) AlternationPatternNode.new(load_node, load_node, load_location, location)
when 3 then when 3 then
AndNode.new(load_node, load_node, load_location, start_offset, length) AndNode.new(load_node, load_node, load_location, location)
when 4 then when 4 then
ArgumentsNode.new(Array.new(load_varint) { load_node }, start_offset, length) ArgumentsNode.new(Array.new(load_varint) { load_node }, location)
when 5 then when 5 then
ArrayNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, start_offset, length) ArrayNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, location)
when 6 then when 6 then
ArrayPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, start_offset, length) ArrayPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, location)
when 7 then when 7 then
AssocNode.new(load_node, load_optional_node, load_optional_location, start_offset, length) AssocNode.new(load_node, load_optional_node, load_optional_location, location)
when 8 then when 8 then
AssocSplatNode.new(load_optional_node, load_location, start_offset, length) AssocSplatNode.new(load_optional_node, load_location, location)
when 9 then when 9 then
BackReferenceReadNode.new(start_offset, length) BackReferenceReadNode.new(location)
when 10 then when 10 then
BeginNode.new(load_optional_location, load_optional_node, load_optional_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length) BeginNode.new(load_optional_location, load_optional_node, load_optional_node, load_optional_node, load_optional_node, load_optional_location, location)
when 11 then when 11 then
BlockArgumentNode.new(load_optional_node, load_location, start_offset, length) BlockArgumentNode.new(load_optional_node, load_location, location)
when 12 then when 12 then
BlockNode.new(Array.new(load_varint) { load_constant }, load_optional_node, load_optional_node, load_location, load_location, start_offset, length) BlockNode.new(Array.new(load_varint) { load_constant }, load_optional_node, load_optional_node, load_location, load_location, location)
when 13 then when 13 then
BlockParameterNode.new(load_optional_location, load_location, start_offset, length) BlockParameterNode.new(load_optional_location, load_location, location)
when 14 then when 14 then
BlockParametersNode.new(load_optional_node, Array.new(load_varint) { load_location }, load_optional_location, load_optional_location, start_offset, length) BlockParametersNode.new(load_optional_node, Array.new(load_varint) { load_location }, load_optional_location, load_optional_location, location)
when 15 then when 15 then
BreakNode.new(load_optional_node, load_location, start_offset, length) BreakNode.new(load_optional_node, load_location, location)
when 16 then when 16 then
CallNode.new(load_optional_node, load_optional_location, load_optional_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, load_varint, load_string, start_offset, length) CallNode.new(load_optional_node, load_optional_location, load_optional_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, load_varint, load_string, location)
when 17 then when 17 then
CallOperatorAndWriteNode.new(load_node, load_location, load_node, start_offset, length) CallOperatorAndWriteNode.new(load_node, load_location, load_node, location)
when 18 then when 18 then
CallOperatorOrWriteNode.new(load_node, load_node, load_location, start_offset, length) CallOperatorOrWriteNode.new(load_node, load_node, load_location, location)
when 19 then when 19 then
CallOperatorWriteNode.new(load_node, load_location, load_node, load_constant, start_offset, length) CallOperatorWriteNode.new(load_node, load_location, load_node, load_constant, location)
when 20 then when 20 then
CapturePatternNode.new(load_node, load_node, load_location, start_offset, length) CapturePatternNode.new(load_node, load_node, load_location, location)
when 21 then when 21 then
CaseNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_location, load_location, start_offset, length) CaseNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_location, load_location, location)
when 22 then when 22 then
ClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_location, load_optional_node, load_optional_node, load_location, start_offset, length) ClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_location, load_optional_node, load_optional_node, load_location, location)
when 23 then when 23 then
ClassVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length) ClassVariableOperatorAndWriteNode.new(load_location, load_location, load_node, location)
when 24 then when 24 then
ClassVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length) ClassVariableOperatorOrWriteNode.new(load_location, load_location, load_node, location)
when 25 then when 25 then
ClassVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length) ClassVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
when 26 then when 26 then
ClassVariableReadNode.new(start_offset, length) ClassVariableReadNode.new(location)
when 27 then when 27 then
ClassVariableWriteNode.new(load_location, load_optional_node, load_optional_location, start_offset, length) ClassVariableWriteNode.new(load_location, load_optional_node, load_optional_location, location)
when 28 then when 28 then
ConstantOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length) ConstantOperatorAndWriteNode.new(load_location, load_location, load_node, location)
when 29 then when 29 then
ConstantOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length) ConstantOperatorOrWriteNode.new(load_location, load_location, load_node, location)
when 30 then when 30 then
ConstantOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length) ConstantOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
when 31 then when 31 then
ConstantPathNode.new(load_optional_node, load_node, load_location, start_offset, length) ConstantPathNode.new(load_optional_node, load_node, load_location, location)
when 32 then when 32 then
ConstantPathOperatorAndWriteNode.new(load_node, load_location, load_node, start_offset, length) ConstantPathOperatorAndWriteNode.new(load_node, load_location, load_node, location)
when 33 then when 33 then
ConstantPathOperatorOrWriteNode.new(load_node, load_location, load_node, start_offset, length) ConstantPathOperatorOrWriteNode.new(load_node, load_location, load_node, location)
when 34 then when 34 then
ConstantPathOperatorWriteNode.new(load_node, load_location, load_node, load_constant, start_offset, length) ConstantPathOperatorWriteNode.new(load_node, load_location, load_node, load_constant, location)
when 35 then when 35 then
ConstantPathWriteNode.new(load_node, load_optional_location, load_optional_node, start_offset, length) ConstantPathWriteNode.new(load_node, load_optional_location, load_optional_node, location)
when 36 then when 36 then
ConstantReadNode.new(start_offset, length) ConstantReadNode.new(location)
when 37 then when 37 then
load_serialized_length load_serialized_length
DefNode.new(load_location, load_optional_node, load_optional_node, load_optional_node, Array.new(load_varint) { load_constant }, load_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, start_offset, length) DefNode.new(load_location, load_optional_node, load_optional_node, load_optional_node, Array.new(load_varint) { load_constant }, load_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, location)
when 38 then when 38 then
DefinedNode.new(load_optional_location, load_node, load_optional_location, load_location, start_offset, length) DefinedNode.new(load_optional_location, load_node, load_optional_location, load_location, location)
when 39 then when 39 then
ElseNode.new(load_location, load_optional_node, load_optional_location, start_offset, length) ElseNode.new(load_location, load_optional_node, load_optional_location, location)
when 40 then when 40 then
EmbeddedStatementsNode.new(load_location, load_optional_node, load_location, start_offset, length) EmbeddedStatementsNode.new(load_location, load_optional_node, load_location, location)
when 41 then when 41 then
EmbeddedVariableNode.new(load_location, load_node, start_offset, length) EmbeddedVariableNode.new(load_location, load_node, location)
when 42 then when 42 then
EnsureNode.new(load_location, load_optional_node, load_location, start_offset, length) EnsureNode.new(load_location, load_optional_node, load_location, location)
when 43 then when 43 then
FalseNode.new(start_offset, length) FalseNode.new(location)
when 44 then when 44 then
FindPatternNode.new(load_optional_node, load_node, Array.new(load_varint) { load_node }, load_node, load_optional_location, load_optional_location, start_offset, length) FindPatternNode.new(load_optional_node, load_node, Array.new(load_varint) { load_node }, load_node, load_optional_location, load_optional_location, location)
when 45 then when 45 then
FloatNode.new(start_offset, length) FloatNode.new(location)
when 46 then when 46 then
ForNode.new(load_node, load_node, load_optional_node, load_location, load_location, load_optional_location, load_location, start_offset, length) ForNode.new(load_node, load_node, load_optional_node, load_location, load_location, load_optional_location, load_location, location)
when 47 then when 47 then
ForwardingArgumentsNode.new(start_offset, length) ForwardingArgumentsNode.new(location)
when 48 then when 48 then
ForwardingParameterNode.new(start_offset, length) ForwardingParameterNode.new(location)
when 49 then when 49 then
ForwardingSuperNode.new(load_optional_node, start_offset, length) ForwardingSuperNode.new(load_optional_node, location)
when 50 then when 50 then
GlobalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length) GlobalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, location)
when 51 then when 51 then
GlobalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length) GlobalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, location)
when 52 then when 52 then
GlobalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length) GlobalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
when 53 then when 53 then
GlobalVariableReadNode.new(start_offset, length) GlobalVariableReadNode.new(location)
when 54 then when 54 then
GlobalVariableWriteNode.new(load_location, load_optional_location, load_optional_node, start_offset, length) GlobalVariableWriteNode.new(load_location, load_optional_location, load_optional_node, location)
when 55 then when 55 then
HashNode.new(load_location, Array.new(load_varint) { load_node }, load_location, start_offset, length) HashNode.new(load_location, Array.new(load_varint) { load_node }, load_location, location)
when 56 then when 56 then
HashPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_location, load_optional_location, start_offset, length) HashPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_location, load_optional_location, location)
when 57 then when 57 then
IfNode.new(load_optional_location, load_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length) IfNode.new(load_optional_location, load_node, load_optional_node, load_optional_node, load_optional_location, location)
when 58 then when 58 then
ImaginaryNode.new(load_node, start_offset, length) ImaginaryNode.new(load_node, location)
when 59 then when 59 then
InNode.new(load_node, load_optional_node, load_location, load_optional_location, start_offset, length) InNode.new(load_node, load_optional_node, load_location, load_optional_location, location)
when 60 then when 60 then
InstanceVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length) InstanceVariableOperatorAndWriteNode.new(load_location, load_location, load_node, location)
when 61 then when 61 then
InstanceVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length) InstanceVariableOperatorOrWriteNode.new(load_location, load_location, load_node, location)
when 62 then when 62 then
InstanceVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length) InstanceVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
when 63 then when 63 then
InstanceVariableReadNode.new(start_offset, length) InstanceVariableReadNode.new(location)
when 64 then when 64 then
InstanceVariableWriteNode.new(load_location, load_optional_node, load_optional_location, start_offset, length) InstanceVariableWriteNode.new(load_location, load_optional_node, load_optional_location, location)
when 65 then when 65 then
IntegerNode.new(start_offset, length) IntegerNode.new(location)
when 66 then when 66 then
InterpolatedRegularExpressionNode.new(load_location, Array.new(load_varint) { load_node }, load_location, load_varint, start_offset, length) InterpolatedRegularExpressionNode.new(load_location, Array.new(load_varint) { load_node }, load_location, load_varint, location)
when 67 then when 67 then
InterpolatedStringNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, start_offset, length) InterpolatedStringNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, location)
when 68 then when 68 then
InterpolatedSymbolNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, start_offset, length) InterpolatedSymbolNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, location)
when 69 then when 69 then
InterpolatedXStringNode.new(load_location, Array.new(load_varint) { load_node }, load_location, start_offset, length) InterpolatedXStringNode.new(load_location, Array.new(load_varint) { load_node }, load_location, location)
when 70 then when 70 then
KeywordHashNode.new(Array.new(load_varint) { load_node }, start_offset, length) KeywordHashNode.new(Array.new(load_varint) { load_node }, location)
when 71 then when 71 then
KeywordParameterNode.new(load_location, load_optional_node, start_offset, length) KeywordParameterNode.new(load_location, load_optional_node, location)
when 72 then when 72 then
KeywordRestParameterNode.new(load_location, load_optional_location, start_offset, length) KeywordRestParameterNode.new(load_location, load_optional_location, location)
when 73 then when 73 then
LambdaNode.new(Array.new(load_varint) { load_constant }, load_location, load_optional_node, load_optional_node, start_offset, length) LambdaNode.new(Array.new(load_varint) { load_constant }, load_location, load_optional_node, load_optional_node, location)
when 74 then when 74 then
LocalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length) LocalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, load_constant, location)
when 75 then when 75 then
LocalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length) LocalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, load_constant, location)
when 76 then when 76 then
LocalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, load_constant, start_offset, length) LocalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, load_constant, location)
when 77 then when 77 then
LocalVariableReadNode.new(load_constant, load_varint, start_offset, length) LocalVariableReadNode.new(load_constant, load_varint, location)
when 78 then when 78 then
LocalVariableWriteNode.new(load_constant, load_varint, load_optional_node, load_location, load_optional_location, start_offset, length) LocalVariableWriteNode.new(load_constant, load_varint, load_optional_node, load_location, load_optional_location, location)
when 79 then when 79 then
MatchPredicateNode.new(load_node, load_node, load_location, start_offset, length) MatchPredicateNode.new(load_node, load_node, load_location, location)
when 80 then when 80 then
MatchRequiredNode.new(load_node, load_node, load_location, start_offset, length) MatchRequiredNode.new(load_node, load_node, load_location, location)
when 81 then when 81 then
MissingNode.new(start_offset, length) MissingNode.new(location)
when 82 then when 82 then
ModuleNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_node, load_location, start_offset, length) ModuleNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_node, load_location, location)
when 83 then when 83 then
MultiWriteNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_location, load_optional_location, start_offset, length) MultiWriteNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_location, load_optional_location, location)
when 84 then when 84 then
NextNode.new(load_optional_node, load_location, start_offset, length) NextNode.new(load_optional_node, load_location, location)
when 85 then when 85 then
NilNode.new(start_offset, length) NilNode.new(location)
when 86 then when 86 then
NoKeywordsParameterNode.new(load_location, load_location, start_offset, length) NoKeywordsParameterNode.new(load_location, load_location, location)
when 87 then when 87 then
NumberedReferenceReadNode.new(start_offset, length) NumberedReferenceReadNode.new(location)
when 88 then when 88 then
OptionalParameterNode.new(load_constant, load_location, load_location, load_node, start_offset, length) OptionalParameterNode.new(load_constant, load_location, load_location, load_node, location)
when 89 then when 89 then
OrNode.new(load_node, load_node, load_location, start_offset, length) OrNode.new(load_node, load_node, load_location, location)
when 90 then when 90 then
ParametersNode.new(Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_node, start_offset, length) ParametersNode.new(Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_node, location)
when 91 then when 91 then
ParenthesesNode.new(load_optional_node, load_location, load_location, start_offset, length) ParenthesesNode.new(load_optional_node, load_location, load_location, location)
when 92 then when 92 then
PinnedExpressionNode.new(load_node, load_location, load_location, load_location, start_offset, length) PinnedExpressionNode.new(load_node, load_location, load_location, load_location, location)
when 93 then when 93 then
PinnedVariableNode.new(load_node, load_location, start_offset, length) PinnedVariableNode.new(load_node, load_location, location)
when 94 then when 94 then
PostExecutionNode.new(load_optional_node, load_location, load_location, load_location, start_offset, length) PostExecutionNode.new(load_optional_node, load_location, load_location, load_location, location)
when 95 then when 95 then
PreExecutionNode.new(load_optional_node, load_location, load_location, load_location, start_offset, length) PreExecutionNode.new(load_optional_node, load_location, load_location, load_location, location)
when 96 then when 96 then
ProgramNode.new(Array.new(load_varint) { load_constant }, load_node, start_offset, length) ProgramNode.new(Array.new(load_varint) { load_constant }, load_node, location)
when 97 then when 97 then
RangeNode.new(load_optional_node, load_optional_node, load_location, load_varint, start_offset, length) RangeNode.new(load_optional_node, load_optional_node, load_location, load_varint, location)
when 98 then when 98 then
RationalNode.new(load_node, start_offset, length) RationalNode.new(load_node, location)
when 99 then when 99 then
RedoNode.new(start_offset, length) RedoNode.new(location)
when 100 then when 100 then
RegularExpressionNode.new(load_location, load_location, load_location, load_string, load_varint, start_offset, length) RegularExpressionNode.new(load_location, load_location, load_location, load_string, load_varint, location)
when 101 then when 101 then
RequiredDestructuredParameterNode.new(Array.new(load_varint) { load_node }, load_location, load_location, start_offset, length) RequiredDestructuredParameterNode.new(Array.new(load_varint) { load_node }, load_location, load_location, location)
when 102 then when 102 then
RequiredParameterNode.new(load_constant, start_offset, length) RequiredParameterNode.new(load_constant, location)
when 103 then when 103 then
RescueModifierNode.new(load_node, load_location, load_node, start_offset, length) RescueModifierNode.new(load_node, load_location, load_node, location)
when 104 then when 104 then
RescueNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_node, load_optional_node, start_offset, length) RescueNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_node, load_optional_node, location)
when 105 then when 105 then
RestParameterNode.new(load_location, load_optional_location, start_offset, length) RestParameterNode.new(load_location, load_optional_location, location)
when 106 then when 106 then
RetryNode.new(start_offset, length) RetryNode.new(location)
when 107 then when 107 then
ReturnNode.new(load_location, load_optional_node, start_offset, length) ReturnNode.new(load_location, load_optional_node, location)
when 108 then when 108 then
SelfNode.new(start_offset, length) SelfNode.new(location)
when 109 then when 109 then
SingletonClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_location, load_node, load_optional_node, load_location, start_offset, length) SingletonClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_location, load_node, load_optional_node, load_location, location)
when 110 then when 110 then
SourceEncodingNode.new(start_offset, length) SourceEncodingNode.new(location)
when 111 then when 111 then
SourceFileNode.new(load_string, start_offset, length) SourceFileNode.new(load_string, location)
when 112 then when 112 then
SourceLineNode.new(start_offset, length) SourceLineNode.new(location)
when 113 then when 113 then
SplatNode.new(load_location, load_optional_node, start_offset, length) SplatNode.new(load_location, load_optional_node, location)
when 114 then when 114 then
StatementsNode.new(Array.new(load_varint) { load_node }, start_offset, length) StatementsNode.new(Array.new(load_varint) { load_node }, location)
when 115 then when 115 then
StringConcatNode.new(load_node, load_node, start_offset, length) StringConcatNode.new(load_node, load_node, location)
when 116 then when 116 then
StringNode.new(load_optional_location, load_location, load_optional_location, load_string, start_offset, length) StringNode.new(load_optional_location, load_location, load_optional_location, load_string, location)
when 117 then when 117 then
SuperNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, start_offset, length) SuperNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, location)
when 118 then when 118 then
SymbolNode.new(load_optional_location, load_location, load_optional_location, load_string, start_offset, length) SymbolNode.new(load_optional_location, load_location, load_optional_location, load_string, location)
when 119 then when 119 then
TrueNode.new(start_offset, length) TrueNode.new(location)
when 120 then when 120 then
UndefNode.new(Array.new(load_varint) { load_node }, load_location, start_offset, length) UndefNode.new(Array.new(load_varint) { load_node }, load_location, location)
when 121 then when 121 then
UnlessNode.new(load_location, load_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length) UnlessNode.new(load_location, load_node, load_optional_node, load_optional_node, load_optional_location, location)
when 122 then when 122 then
UntilNode.new(load_location, load_node, load_optional_node, start_offset, length) UntilNode.new(load_location, load_node, load_optional_node, location)
when 123 then when 123 then
WhenNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_node, start_offset, length) WhenNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_node, location)
when 124 then when 124 then
WhileNode.new(load_location, load_node, load_optional_node, start_offset, length) WhileNode.new(load_location, load_node, load_optional_node, location)
when 125 then when 125 then
XStringNode.new(load_location, load_location, load_location, load_string, start_offset, length) XStringNode.new(load_location, load_location, load_location, load_string, location)
when 126 then when 126 then
YieldNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, start_offset, length) YieldNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, location)
end end
end end
end end

View File

@ -1,212 +0,0 @@
# frozen_string_literal: true
require "yarp_test_helper"
class CompileTest < Test::Unit::TestCase
def test_AliasNode
assert_compiles("alias foo bar")
end
def test_AndNode
assert_compiles("true && false")
end
def test_ArrayNode
assert_compiles("[]")
assert_compiles("[foo, bar, baz]")
end
def test_AssocNode
assert_compiles("{ foo: bar }")
end
def test_BlockNode
assert_compiles("foo { bar }")
end
def test_BlockNode_with_optionals
assert_compiles("foo { |x = 1| bar }")
end
def test_CallNode
assert_compiles("foo")
assert_compiles("foo(bar)")
end
def test_ClassVariableReadNode
assert_compiles("@@foo")
end
def test_ClassVariableWriteNode
assert_compiles("@@foo = 1")
end
def test_FalseNode
assert_compiles("false")
end
def test_GlobalVariableReadNode
assert_compiles("$foo")
end
def test_GlobalVariableWriteNode
assert_compiles("$foo = 1")
end
def test_HashNode
assert_compiles("{ foo: bar }")
end
def test_InstanceVariableReadNode
assert_compiles("@foo")
end
def test_InstanceVariableWriteNode
assert_compiles("@foo = 1")
end
def test_IntegerNode
assert_compiles("1")
assert_compiles("1_000")
end
def test_InterpolatedStringNode
assert_compiles("\"foo \#{bar} baz\"")
end
def test_LocalVariableWriteNode
assert_compiles("foo = 1")
end
def test_LocalVariableReadNode
assert_compiles("[foo = 1, foo]")
end
def test_NilNode
assert_compiles("nil")
end
def test_OrNode
assert_compiles("true || false")
end
def test_ParenthesesNode
assert_compiles("()")
end
def test_ProgramNode
assert_compiles("")
end
def test_RangeNode
assert_compiles("foo..bar")
assert_compiles("foo...bar")
assert_compiles("(foo..)")
assert_compiles("(foo...)")
assert_compiles("(..bar)")
assert_compiles("(...bar)")
end
def test_SelfNode
assert_compiles("self")
end
def test_StringNode
assert_compiles("\"foo\"")
end
def test_SymbolNode
assert_compiles(":foo")
end
def test_TrueNode
assert_compiles("true")
end
def test_UndefNode
assert_compiles("undef :foo, :bar, :baz")
end
def test_XStringNode
assert_compiles("`foo`")
end
private
def assert_compiles(source)
assert_equal_iseqs(rubyvm_compile(source), YARP.compile(source))
end
# Instruction sequences have 13 elements in their lists. We don't currently
# support all of the fields, so we can't compare the iseqs directly. Instead,
# we compare the elements that we do support.
def assert_equal_iseqs(expected, actual)
# The first element is the magic comment string.
assert_equal expected[0], actual[0]
# The next three elements are the major, minor, and patch version numbers.
# TODO: Insert this check once Ruby 3.3 is released, and the TruffleRuby
# GitHub workflow also checks against Ruby 3.3
# assert_equal expected[1...4], actual[1...4]
# The next element is a set of options for the iseq. It has lots of
# different information, some of which we support and some of which we
# don't.
assert_equal expected[4][:arg_size], actual[4][:arg_size], "Unexpected difference in arg_size"
assert_equal expected[4][:stack_max], actual[4][:stack_max], "Unexpected difference in stack_max"
assert_kind_of Integer, actual[4][:local_size]
assert_kind_of Integer, actual[4][:node_id]
assert_equal expected[4][:code_location].length, actual[4][:code_location].length, "Unexpected difference in code_location length"
assert_equal expected[4][:node_ids].length, actual[4][:node_ids].length, "Unexpected difference in node_ids length"
# Then we have the name of the iseq, the relative file path, the absolute
# file path, and the line number. We don't have this working quite yet.
assert_kind_of String, actual[5]
assert_kind_of String, actual[6]
assert_kind_of String, actual[7]
assert_kind_of Integer, actual[8]
# Next we have the type of the iseq.
assert_equal expected[9], actual[9]
# Next we have the list of local variables. We don't support this yet.
assert_kind_of Array, actual[10]
# Next we have the argument options. These are used in block and method
# iseqs to reflect how the arguments are passed.
assert_equal expected[11], actual[11], "Unexpected difference in argument options"
# Next we have the catch table entries. We don't have this working yet.
assert_kind_of Array, actual[12]
# Finally we have the actual instructions. We support some of this, but omit
# line numbers and some tracepoint events.
expected[13].each do |insn|
case insn
in [:send, opnds, expected_block] unless expected_block.nil?
actual[13].shift => [:send, ^(opnds), actual_block]
assert_equal_iseqs expected_block, actual_block
in Array | :RUBY_EVENT_B_CALL | :RUBY_EVENT_B_RETURN | /^label_\d+/
assert_equal insn, actual[13].shift
in Integer | /^RUBY_EVENT_/
# skip these for now
else
flunk "Unexpected instruction: #{insn.inspect}"
end
end
end
def rubyvm_compile(source)
options = {
peephole_optimization: false,
specialized_instruction: false,
operands_unification: false,
instructions_unification: false,
frozen_string_literal: false
}
RubyVM::InstructionSequence.compile(source, **options).to_a
end
end

View File

@ -55,6 +55,21 @@ class EncodingTest < Test::Unit::TestCase
assert_equal Encoding.find("utf-8"), actual assert_equal Encoding.find("utf-8"), actual
end end
# This test may be a little confusing. Basically when we use our strpbrk, it
# takes into account the encoding of the file.
def test_strpbrk_multibyte
result = YARP.parse(<<~RUBY)
# encoding: Shift_JIS
%w[\x81\x5c]
RUBY
assert(result.errors.empty?)
assert_equal(
(+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
result.value.statements.body.first.elements.first.unescaped
)
end
def test_utf_8_variations def test_utf_8_variations
%w[ %w[
utf-8-unix utf-8-unix

View File

@ -18,3 +18,16 @@ not foo and
bar bar
not(foo
)
not(
foo
)

View File

@ -76,6 +76,7 @@ foo => Foo(*bar, baz, *qux)
foo => Foo[] foo => Foo[]
foo => Foo[1] foo => Foo[1]
foo => Foo[1, 2, 3] foo => Foo[1, 2, 3]
foo => Foo[Foo[]]
foo => Foo[bar] foo => Foo[bar]
foo => Foo[*bar, baz] foo => Foo[*bar, baz]
foo => Foo[bar, *baz] foo => Foo[bar, *baz]

View File

@ -1,363 +0,0 @@
# frozen_string_literal: true
require_relative "yarp_test_helper"
require "yarp/language_server"
module YARP
class LanguageServerTest < Test::Unit::TestCase
module Request
# Represents a hash pattern.
class Shape
attr_reader :values
def initialize(values)
@values = values
end
def ===(other)
values.all? do |key, value|
value == :any ? other.key?(key) : value === other[key]
end
end
end
# Represents an array pattern.
class Tuple
attr_reader :values
def initialize(values)
@values = values
end
def ===(other)
values.each_with_index.all? { |value, index| value === other[index] }
end
end
def self.[](value)
case value
when Array
Tuple.new(value.map { |child| self[child] })
when Hash
Shape.new(value.transform_values { |child| self[child] })
else
value
end
end
end
class Initialize < Struct.new(:id)
def to_hash
{ method: "initialize", id: id }
end
end
class Shutdown < Struct.new(:id)
def to_hash
{ method: "shutdown", id: id }
end
end
class TextDocumentDidOpen < Struct.new(:uri, :text)
def to_hash
{
method: "textDocument/didOpen",
params: { textDocument: { uri: uri, text: text } }
}
end
end
class TextDocumentDidChange < Struct.new(:uri, :text)
def to_hash
{
method: "textDocument/didChange",
params: {
textDocument: { uri: uri },
contentChanges: [{ text: text }]
}
}
end
end
class TextDocumentDidClose < Struct.new(:uri)
def to_hash
{
method: "textDocument/didClose",
params: { textDocument: { uri: uri } }
}
end
end
class TextDocumentCodeAction < Struct.new(:id, :uri, :diagnostics)
def to_hash
{
method: "textDocument/codeAction",
id: id,
params: {
textDocument: { uri: uri },
context: {
diagnostics: diagnostics,
},
},
}
end
end
class TextDocumentDiagnostic < Struct.new(:id, :uri)
def to_hash
{
method: "textDocument/diagnostic",
id: id,
params: {
textDocument: { uri: uri },
}
}
end
end
def test_reading_file
Tempfile.create(%w[test- .rb]) do |file|
file.write("class Foo; end")
file.rewind
responses = run_server([
Initialize.new(1),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
end
end
def test_clean_shutdown
responses = run_server([Initialize.new(1), Shutdown.new(2)])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 2, result: {} }
]]
assert_operator(shape, :===, responses)
end
def test_file_that_does_not_exist
responses = run_server([
Initialize.new(1),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
end
def test_code_action_request
message = "this is an error"
diagnostic = {
range: { start: { line: 0, character: 0 }, end: { line: 0, character: 0 } },
message: message,
severity: 1,
}
responses = run_server([
Initialize.new(1),
TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
1 + (
RUBY
TextDocumentCodeAction.new(2, "file:///path/to/file.rb", [diagnostic]),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 2, result: [
{
title: "Report incorrect error: `#{message}`",
kind: "quickfix",
diagnostics: [diagnostic],
command: {
title: "Report incorrect error",
command: "vscode.open",
arguments: [String]
}
}
],
},
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
assert(responses.dig(1, :result, 0, :command, :arguments, 0).include?(URI.encode_www_form_component(message)))
end
def test_code_action_request_no_diagnostic
responses = run_server([
Initialize.new(1),
TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
1 + (
RUBY
TextDocumentCodeAction.new(2, "file:///path/to/file.rb", []),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 2, result: [] },
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
end
def test_code_action_request_no_content
message = "this is an error"
diagnostic = {
range: { start: { line: 0, character: 0 }, end: { line: 0, character: 0 } },
message: message,
severity: 1,
}
responses = run_server([
Initialize.new(1),
TextDocumentCodeAction.new(2, "file:///path/to/file.rb", [diagnostic]),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 2, result: nil },
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
end
def test_diagnostics_request_error
responses = run_server([
Initialize.new(1),
TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
1 + (
RUBY
TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 2, result: { kind: "full", items: [
{
range: {
start: { line: Integer, character: Integer },
end: { line: Integer, character: Integer }
},
message: String,
severity: Integer
},
] } },
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
assert(responses.dig(1, :result, :items).count { |item| item[:severity] == 1 } > 0)
end
def test_diagnostics_request_warning
responses = run_server([
Initialize.new(1),
TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
a/b /c
RUBY
TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 2, result: { kind: "full", items: [
{
range: {
start: { line: Integer, character: Integer },
end: { line: Integer, character: Integer }
},
message: String,
severity: Integer
},
] } },
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
assert(responses.dig(1, :result, :items).count { |item| item[:severity] == 2 } > 0)
end
def test_diagnostics_request_nothing
responses = run_server([
Initialize.new(1),
TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
a = 1
RUBY
TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 2, result: { kind: "full", items: [] } },
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
assert_equal(0, responses.dig(1, :result, :items).size)
end
def test_diagnostics_request_no_content
responses = run_server([
Initialize.new(1),
TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
Shutdown.new(3)
])
shape = Request[[
{ id: 1, result: { capabilities: Hash } },
{ id: 2, result: nil },
{ id: 3, result: {} }
]]
assert_operator(shape, :===, responses)
end
private
def write(content)
request = content.to_hash.merge(jsonrpc: "2.0").to_json
"Content-Length: #{request.bytesize}\r\n\r\n#{request}"
end
def read(content)
[].tap do |messages|
while (headers = content.gets("\r\n\r\n"))
source = content.read(headers[/Content-Length: (\d+)/i, 1].to_i)
messages << JSON.parse(source, symbolize_names: true)
end
end
end
def run_server(messages)
input = StringIO.new(messages.map { |message| write(message) }.join)
output = StringIO.new
LanguageServer.new(
input: input,
output: output,
).run
read(output.tap(&:rewind))
end
end
end

View File

@ -3,10 +3,12 @@
require "yarp_test_helper" require "yarp_test_helper"
class ParseTest < Test::Unit::TestCase class ParseTest < Test::Unit::TestCase
# Because we're reading the snapshots from disk, we need to make sure that # When we pretty-print the trees to compare against the snapshots, we want to
# they're encoded as UTF-8. When certain settings are present this might not # be certain that we print with the same external encoding. This is because
# always be the case (e.g., LANG=C or -Eascii-8bit). So here we force the # methods like Symbol#inspect take into account external encoding and it could
# default external encoding for the duration of the test. # change how the snapshot is generated. On machines with certain settings
# (like LANG=C or -Eascii-8bit) this could have been changed. So here we're
# going to force it to be UTF-8 to keep the snapshots consistent.
def setup def setup
@previous_default_external = Encoding.default_external @previous_default_external = Encoding.default_external
ignore_warnings { Encoding.default_external = Encoding::UTF_8 } ignore_warnings { Encoding.default_external = Encoding::UTF_8 }
@ -29,20 +31,6 @@ class ParseTest < Test::Unit::TestCase
seattlerb/pct_w_heredoc_interp_nested.txt seattlerb/pct_w_heredoc_interp_nested.txt
] ]
# Because the filepath in SourceFileNodes is different from one maching to the
# next, PP.pp(sexp, +"", 79) can have different results: both the path itself
# and the line breaks based on the length of the path.
def normalize_printed(printed)
printed
.gsub(
/SourceFileNode \s*
\(\s* (\d+\.\.\.\d+) \s*\) \s*
\(\s* ("[^"]*") \s*\)
/mx,
'SourceFileNode(\1)(\2)')
.gsub(__dir__, "")
end
def find_source_file_node(node) def find_source_file_node(node)
if node.is_a?(YARP::SourceFileNode) if node.is_a?(YARP::SourceFileNode)
node node
@ -79,27 +67,26 @@ class ParseTest < Test::Unit::TestCase
# that is invalid Ruby. # that is invalid Ruby.
refute_nil Ripper.sexp_raw(source) refute_nil Ripper.sexp_raw(source)
# Next, parse the source and print the value.
result = YARP.parse_file(filepath)
value = result.value
printed = normalize_printed(PP.pp(value, +"", 79))
# Next, assert that there were no errors during parsing. # Next, assert that there were no errors during parsing.
assert_empty result.errors, value result = YARP.parse(source, relative)
assert_empty result.errors
# Next, pretty print the source.
printed = PP.pp(result.value, +"", 79)
if File.exist?(snapshot) if File.exist?(snapshot)
normalized = normalize_printed(File.read(snapshot)) saved = File.read(snapshot)
# If the snapshot file exists, but the printed value does not match the # If the snapshot file exists, but the printed value does not match the
# snapshot, then update the snapshot file. # snapshot, then update the snapshot file.
if normalized != printed if printed != saved
File.write(snapshot, normalized) File.write(snapshot, printed)
warn("Updated snapshot at #{snapshot}.") warn("Updated snapshot at #{snapshot}.")
end end
# If the snapshot file exists, then assert that the printed value # If the snapshot file exists, then assert that the printed value
# matches the snapshot. # matches the snapshot.
assert_equal(normalized, printed) assert_equal(saved, printed)
else else
# If the snapshot file does not yet exist, then write it out now. # If the snapshot file does not yet exist, then write it out now.
File.write(snapshot, printed) File.write(snapshot, printed)
@ -108,11 +95,11 @@ class ParseTest < Test::Unit::TestCase
# Next, assert that the value can be serialized and deserialized without # Next, assert that the value can be serialized and deserialized without
# changing the shape of the tree. # changing the shape of the tree.
assert_equal_nodes(value, YARP.load(source, YARP.dump(source, filepath))) assert_equal_nodes(result.value, YARP.load(source, YARP.dump(source, relative)))
# Next, assert that the newlines are in the expected places. # Next, assert that the newlines are in the expected places.
expected_newlines = [0] expected_newlines = [0]
source.b.scan("\n") { expected_newlines << $~.offset(0)[0] } source.b.scan("\n") { expected_newlines << $~.offset(0)[0] + 1 }
assert_equal expected_newlines, YARP.newlines(source) assert_equal expected_newlines, YARP.newlines(source)
# Finally, assert that we can lex the source and get the same tokens as # Finally, assert that we can lex the source and get the same tokens as

View File

@ -101,6 +101,10 @@ class RegexpTest < Test::Unit::TestCase
refute_nil(YARP.named_captures("(?#foo)")) refute_nil(YARP.named_captures("(?#foo)"))
end end
def test_comments_with_escaped_parentheses
refute_nil(YARP.named_captures("(?#foo\\)\\))"))
end
def test_non_capturing_groups def test_non_capturing_groups
refute_nil(YARP.named_captures("(?:foo)")) refute_nil(YARP.named_captures("(?:foo)"))
end end

View File

@ -102,7 +102,7 @@ ProgramNode(0...185)(
StringNode(123...129)((123...125), (125...128), (128...129), "abc"), StringNode(123...129)((123...125), (125...128), (128...129), "abc"),
DefNode(131...149)( DefNode(131...149)(
(144...145), (144...145),
SourceFileNode(135...143)("/fixtures/keyword_method_names.txt"), SourceFileNode(135...143)("keyword_method_names.txt"),
nil, nil,
nil, nil,
[], [],

View File

@ -5,7 +5,7 @@ ProgramNode(0...51)(
RetryNode(6...11)(), RetryNode(6...11)(),
SelfNode(13...17)(), SelfNode(13...17)(),
SourceEncodingNode(19...31)(), SourceEncodingNode(19...31)(),
SourceFileNode(33...41)("/fixtures/keywords.txt"), SourceFileNode(33...41)("keywords.txt"),
SourceLineNode(43...51)()] SourceLineNode(43...51)()]
) )
) )

View File

@ -1,6 +1,6 @@
ProgramNode(0...125)( ProgramNode(0...156)(
[], [],
StatementsNode(0...125)( StatementsNode(0...156)(
[AndNode(0...19)( [AndNode(0...19)(
CallNode(0...7)( CallNode(0...7)(
CallNode(4...7)(nil, nil, (4...7), nil, nil, nil, nil, 0, "foo"), CallNode(4...7)(nil, nil, (4...7), nil, nil, nil, nil, 0, "foo"),
@ -146,6 +146,48 @@ ProgramNode(0...125)(
"!" "!"
), ),
(108...111) (108...111)
),
CallNode(127...138)(
CallNode(131...134)(
nil,
nil,
(131...134),
nil,
nil,
nil,
nil,
0,
"foo"
),
nil,
(127...130),
(130...131),
nil,
(137...138),
nil,
0,
"!"
),
CallNode(140...156)(
CallNode(147...150)(
nil,
nil,
(147...150),
nil,
nil,
nil,
nil,
0,
"foo"
),
nil,
(140...143),
(143...144),
nil,
(155...156),
nil,
0,
"!"
)] )]
) )
) )

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@ ProgramNode(0...38)(
[], [],
StatementsNode(0...38)( StatementsNode(0...38)(
[SourceEncodingNode(0...12)(), [SourceEncodingNode(0...12)(),
SourceFileNode(13...21)("/fixtures/unparser/corpus/literal/pragma.txt"), SourceFileNode(13...21)("unparser/corpus/literal/pragma.txt"),
SourceLineNode(22...30)(), SourceLineNode(22...30)(),
CallNode(31...38)(nil, nil, (31...38), nil, nil, nil, nil, 0, "__dir__")] CallNode(31...38)(nil, nil, (31...38), nil, nil, nil, nil, 0, "__dir__")]
) )

View File

@ -3,7 +3,9 @@ ProgramNode(8...111)(
StatementsNode(8...111)( StatementsNode(8...111)(
[CaseNode(8...111)( [CaseNode(8...111)(
ArrayNode(13...51)( ArrayNode(13...51)(
[SourceFileNode(14...22)("/fixtures/whitequark/pattern_matching__FILE__LINE_literals.txt"), [SourceFileNode(14...22)(
"whitequark/pattern_matching__FILE__LINE_literals.txt"
),
CallNode(24...36)( CallNode(24...36)(
SourceLineNode(24...32)(), SourceLineNode(24...32)(),
nil, nil,
@ -22,7 +24,9 @@ ProgramNode(8...111)(
[InNode(62...99)( [InNode(62...99)(
ArrayPatternNode(65...99)( ArrayPatternNode(65...99)(
nil, nil,
[SourceFileNode(66...74)("/fixtures/whitequark/pattern_matching__FILE__LINE_literals.txt"), [SourceFileNode(66...74)(
"whitequark/pattern_matching__FILE__LINE_literals.txt"
),
SourceLineNode(76...84)(), SourceLineNode(76...84)(),
SourceEncodingNode(86...98)()], SourceEncodingNode(86...98)()],
nil, nil,

View File

@ -1,6 +1,6 @@
ProgramNode(0...8)( ProgramNode(0...8)(
[], [],
StatementsNode(0...8)( StatementsNode(0...8)(
[SourceFileNode(0...8)("/fixtures/whitequark/string___FILE__.txt")] [SourceFileNode(0...8)("whitequark/string___FILE__.txt")]
) )
) )

File diff suppressed because it is too large Load Diff

View File

@ -9,14 +9,13 @@
#define YARP_AST_H #define YARP_AST_H
#include "yarp/defines.h" #include "yarp/defines.h"
#include "yarp/util/yp_constant_pool.h"
#include "yarp/util/yp_string.h"
#include <assert.h> #include <assert.h>
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include "yarp/util/yp_constant_pool.h"
#include "yarp/util/yp_string.h"
// This enum represents every type of token in the Ruby source. // This enum represents every type of token in the Ruby source.
typedef enum yp_token_type { typedef enum yp_token_type {
YP_TOKEN_EOF = 1, // final token in the file YP_TOKEN_EOF = 1, // final token in the file

View File

@ -1,826 +0,0 @@
#include "yarp/extension.h"
typedef enum {
YP_ISEQ_TYPE_TOP,
YP_ISEQ_TYPE_BLOCK
} yp_iseq_type_t;
typedef enum {
YP_RUBY_EVENT_B_CALL,
YP_RUBY_EVENT_B_RETURN
} yp_ruby_event_t;
typedef struct yp_iseq_compiler {
// This is the parent compiler. It is used to communicate between ISEQs that
// need to be able to jump back to the parent ISEQ.
struct yp_iseq_compiler *parent;
// This is the list of local variables that are defined on this scope.
yp_constant_id_list_t *locals;
// This is the instruction sequence that we are compiling. It's actually just
// a Ruby array that maps to the output of RubyVM::InstructionSequence#to_a.
VALUE insns;
// This is a list of IDs coming from the instructions that are being compiled.
// In theory they should be deterministic, but we don't have that
// functionality yet. Fortunately you can pass -1 for all of them and
// everything for the most part continues to work.
VALUE node_ids;
// This is the current size of the instruction sequence's stack.
int stack_size;
// This is the maximum size of the instruction sequence's stack.
int stack_max;
// This is the name of the instruction sequence.
const char *name;
// This is the type of the instruction sequence.
yp_iseq_type_t type;
// This is the optional argument information.
VALUE optionals;
// This is the number of arguments.
int arg_size;
// This is the current size of the instruction sequence's instructions and
// operands.
size_t size;
// This is the index of the current inline storage.
size_t inline_storage_index;
} yp_iseq_compiler_t;
static void
yp_iseq_compiler_init(yp_iseq_compiler_t *compiler, yp_iseq_compiler_t *parent, yp_constant_id_list_t *locals, const char *name, yp_iseq_type_t type) {
*compiler = (yp_iseq_compiler_t) {
.parent = parent,
.locals = locals,
.insns = rb_ary_new(),
.node_ids = rb_ary_new(),
.stack_size = 0,
.stack_max = 0,
.name = name,
.type = type,
.optionals = rb_hash_new(),
.arg_size = 0,
.size = 0,
.inline_storage_index = 0
};
}
/******************************************************************************/
/* Utilities */
/******************************************************************************/
static inline int
sizet2int(size_t value) {
if (value > INT_MAX) rb_raise(rb_eRuntimeError, "value too large");
return (int) value;
}
static int
local_index(yp_iseq_compiler_t *compiler, yp_constant_id_t constant_id, int depth) {
int compiler_index;
yp_iseq_compiler_t *local_compiler = compiler;
for (compiler_index = 0; compiler_index < depth; compiler_index++) {
local_compiler = local_compiler->parent;
assert(local_compiler != NULL);
}
size_t index;
for (index = 0; index < local_compiler->locals->size; index++) {
if (local_compiler->locals->ids[index] == constant_id) {
return sizet2int(local_compiler->locals->size - index + 2);
}
}
return -1;
}
/******************************************************************************/
/* Parse specific VALUEs from strings */
/******************************************************************************/
static VALUE
parse_number(const char *start, const char *end) {
size_t length = end - start;
char *buffer = alloca(length + 1);
memcpy(buffer, start, length);
buffer[length] = '\0';
return rb_cstr_to_inum(buffer, -10, Qfalse);
}
static inline VALUE
parse_string(yp_string_t *string) {
return rb_str_new(yp_string_source(string), yp_string_length(string));
}
static inline ID
parse_symbol(const char *start, const char *end) {
return rb_intern2(start, end - start);
}
static inline ID
parse_location_symbol(yp_location_t *location) {
return parse_symbol(location->start, location->end);
}
static inline ID
parse_node_symbol(yp_node_t *node) {
return parse_symbol(node->location.start, node->location.end);
}
static inline ID
parse_string_symbol(yp_string_t *string) {
const char *start = yp_string_source(string);
return parse_symbol(start, start + yp_string_length(string));
}
/******************************************************************************/
/* Create Ruby objects for compilation */
/******************************************************************************/
static VALUE
yp_iseq_new(yp_iseq_compiler_t *compiler) {
VALUE code_location = rb_ary_new_capa(4);
rb_ary_push(code_location, INT2FIX(1));
rb_ary_push(code_location, INT2FIX(0));
rb_ary_push(code_location, INT2FIX(1));
rb_ary_push(code_location, INT2FIX(0));
VALUE data = rb_hash_new();
rb_hash_aset(data, ID2SYM(rb_intern("arg_size")), INT2FIX(compiler->arg_size));
rb_hash_aset(data, ID2SYM(rb_intern("local_size")), INT2FIX(0));
rb_hash_aset(data, ID2SYM(rb_intern("stack_max")), INT2FIX(compiler->stack_max));
rb_hash_aset(data, ID2SYM(rb_intern("node_id")), INT2FIX(-1));
rb_hash_aset(data, ID2SYM(rb_intern("code_location")), code_location);
rb_hash_aset(data, ID2SYM(rb_intern("node_ids")), compiler->node_ids);
VALUE type = Qnil;
switch (compiler->type) {
case YP_ISEQ_TYPE_TOP:
type = ID2SYM(rb_intern("top"));
break;
case YP_ISEQ_TYPE_BLOCK:
type = ID2SYM(rb_intern("block"));
break;
}
VALUE iseq = rb_ary_new_capa(13);
rb_ary_push(iseq, rb_str_new_cstr("YARVInstructionSequence/SimpleDataFormat"));
rb_ary_push(iseq, INT2FIX(3));
rb_ary_push(iseq, INT2FIX(3));
rb_ary_push(iseq, INT2FIX(1));
rb_ary_push(iseq, data);
rb_ary_push(iseq, rb_str_new_cstr(compiler->name));
rb_ary_push(iseq, rb_str_new_cstr("<compiled>"));
rb_ary_push(iseq, rb_str_new_cstr("<compiled>"));
rb_ary_push(iseq, INT2FIX(1));
rb_ary_push(iseq, type);
rb_ary_push(iseq, rb_ary_new());
rb_ary_push(iseq, compiler->optionals);
rb_ary_push(iseq, rb_ary_new());
rb_ary_push(iseq, compiler->insns);
return iseq;
}
// static const int YP_CALLDATA_ARGS_SPLAT = 1 << 0;
// static const int YP_CALLDATA_ARGS_BLOCKARG = 1 << 1;
static const int YP_CALLDATA_FCALL = 1 << 2;
static const int YP_CALLDATA_VCALL = 1 << 3;
static const int YP_CALLDATA_ARGS_SIMPLE = 1 << 4;
// static const int YP_CALLDATA_BLOCKISEQ = 1 << 5;
// static const int YP_CALLDATA_KWARG = 1 << 6;
// static const int YP_CALLDATA_KW_SPLAT = 1 << 7;
// static const int YP_CALLDATA_TAILCALL = 1 << 8;
// static const int YP_CALLDATA_SUPER = 1 << 9;
// static const int YP_CALLDATA_ZSUPER = 1 << 10;
// static const int YP_CALLDATA_OPT_SEND = 1 << 11;
// static const int YP_CALLDATA_KW_SPLAT_MUT = 1 << 12;
static VALUE
yp_calldata_new(ID mid, int flag, size_t orig_argc) {
VALUE calldata = rb_hash_new();
rb_hash_aset(calldata, ID2SYM(rb_intern("mid")), ID2SYM(mid));
rb_hash_aset(calldata, ID2SYM(rb_intern("flag")), INT2FIX(flag));
rb_hash_aset(calldata, ID2SYM(rb_intern("orig_argc")), INT2FIX(orig_argc));
return calldata;
}
static inline VALUE
yp_inline_storage_new(yp_iseq_compiler_t *compiler) {
return INT2FIX(compiler->inline_storage_index++);
}
/******************************************************************************/
/* Push instructions onto a compiler */
/******************************************************************************/
static VALUE
push_insn(yp_iseq_compiler_t *compiler, int stack_change, size_t size, ...) {
va_list opnds;
va_start(opnds, size);
VALUE insn = rb_ary_new_capa(size);
for (size_t index = 0; index < size; index++) {
rb_ary_push(insn, va_arg(opnds, VALUE));
}
va_end(opnds);
compiler->stack_size += stack_change;
if (compiler->stack_size > compiler->stack_max) {
compiler->stack_max = compiler->stack_size;
}
compiler->size += size;
rb_ary_push(compiler->insns, insn);
rb_ary_push(compiler->node_ids, INT2FIX(-1));
return insn;
}
static VALUE
push_label(yp_iseq_compiler_t *compiler) {
VALUE label = ID2SYM(rb_intern_str(rb_sprintf("label_%zu", compiler->size)));
rb_ary_push(compiler->insns, label);
return label;
}
static void
push_ruby_event(yp_iseq_compiler_t *compiler, yp_ruby_event_t event) {
switch (event) {
case YP_RUBY_EVENT_B_CALL:
rb_ary_push(compiler->insns, ID2SYM(rb_intern("RUBY_EVENT_B_CALL")));
break;
case YP_RUBY_EVENT_B_RETURN:
rb_ary_push(compiler->insns, ID2SYM(rb_intern("RUBY_EVENT_B_RETURN")));
break;
}
}
static inline VALUE
push_anytostring(yp_iseq_compiler_t *compiler) {
return push_insn(compiler, -2 + 1, 1, ID2SYM(rb_intern("anytostring")));
}
static inline VALUE
push_branchif(yp_iseq_compiler_t *compiler, VALUE label) {
return push_insn(compiler, -1 + 0, 2, ID2SYM(rb_intern("branchif")), label);
}
static inline VALUE
push_branchunless(yp_iseq_compiler_t *compiler, VALUE label) {
return push_insn(compiler, -1 + 0, 2, ID2SYM(rb_intern("branchunless")), label);
}
static inline VALUE
push_concatstrings(yp_iseq_compiler_t *compiler, int count) {
return push_insn(compiler, -count + 1, 2, ID2SYM(rb_intern("concatstrings")), INT2FIX(count));
}
static inline VALUE
push_dup(yp_iseq_compiler_t *compiler) {
return push_insn(compiler, -1 + 2, 1, ID2SYM(rb_intern("dup")));
}
static inline VALUE
push_getclassvariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
return push_insn(compiler, -0 + 1, 3, ID2SYM(rb_intern("getclassvariable")), name, inline_storage);
}
static inline VALUE
push_getconstant(yp_iseq_compiler_t *compiler, VALUE name) {
return push_insn(compiler, -2 + 1, 2, ID2SYM(rb_intern("getconstant")), name);
}
static inline VALUE
push_getglobal(yp_iseq_compiler_t *compiler, VALUE name) {
return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("getglobal")), name);
}
static inline VALUE
push_getinstancevariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
return push_insn(compiler, -0 + 1, 3, ID2SYM(rb_intern("getinstancevariable")), name, inline_storage);
}
static inline VALUE
push_getlocal(yp_iseq_compiler_t *compiler, VALUE index, VALUE depth) {
return push_insn(compiler, -0 + 1, 3, ID2SYM(rb_intern("getlocal")), index, depth);
}
static inline VALUE
push_leave(yp_iseq_compiler_t *compiler) {
return push_insn(compiler, -1 + 0, 1, ID2SYM(rb_intern("leave")));
}
static inline VALUE
push_newarray(yp_iseq_compiler_t *compiler, int count) {
return push_insn(compiler, -count + 1, 2, ID2SYM(rb_intern("newarray")), INT2FIX(count));
}
static inline VALUE
push_newhash(yp_iseq_compiler_t *compiler, int count) {
return push_insn(compiler, -count + 1, 2, ID2SYM(rb_intern("newhash")), INT2FIX(count));
}
static inline VALUE
push_newrange(yp_iseq_compiler_t *compiler, VALUE flag) {
return push_insn(compiler, -2 + 1, 2, ID2SYM(rb_intern("newrange")), flag);
}
static inline VALUE
push_nop(yp_iseq_compiler_t *compiler) {
return push_insn(compiler, -2 + 1, 1, ID2SYM(rb_intern("nop")));
}
static inline VALUE
push_objtostring(yp_iseq_compiler_t *compiler, VALUE calldata) {
return push_insn(compiler, -1 + 1, 2, ID2SYM(rb_intern("objtostring")), calldata);
}
static inline VALUE
push_pop(yp_iseq_compiler_t *compiler) {
return push_insn(compiler, -1 + 0, 1, ID2SYM(rb_intern("pop")));
}
static inline VALUE
push_putnil(yp_iseq_compiler_t *compiler) {
return push_insn(compiler, -0 + 1, 1, ID2SYM(rb_intern("putnil")));
}
static inline VALUE
push_putobject(yp_iseq_compiler_t *compiler, VALUE value) {
return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("putobject")), value);
}
static inline VALUE
push_putself(yp_iseq_compiler_t *compiler) {
return push_insn(compiler, -0 + 1, 1, ID2SYM(rb_intern("putself")));
}
static inline VALUE
push_setlocal(yp_iseq_compiler_t *compiler, VALUE index, VALUE depth) {
return push_insn(compiler, -1 + 0, 3, ID2SYM(rb_intern("setlocal")), index, depth);
}
static const VALUE YP_SPECIALOBJECT_VMCORE = INT2FIX(1);
static const VALUE YP_SPECIALOBJECT_CBASE = INT2FIX(2);
// static const VALUE YP_SPECIALOBJECT_CONST_BASE = INT2FIX(3);
static inline VALUE
push_putspecialobject(yp_iseq_compiler_t *compiler, VALUE object) {
return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("putspecialobject")), object);
}
static inline VALUE
push_putstring(yp_iseq_compiler_t *compiler, VALUE string) {
return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("putstring")), string);
}
static inline VALUE
push_send(yp_iseq_compiler_t *compiler, int stack_change, VALUE calldata, VALUE block_iseq) {
return push_insn(compiler, stack_change, 3, ID2SYM(rb_intern("send")), calldata, block_iseq);
}
static inline VALUE
push_setclassvariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
return push_insn(compiler, -1 + 0, 3, ID2SYM(rb_intern("setclassvariable")), name, inline_storage);
}
static inline VALUE
push_setglobal(yp_iseq_compiler_t *compiler, VALUE name) {
return push_insn(compiler, -1 + 0, 2, ID2SYM(rb_intern("setglobal")), name);
}
static inline VALUE
push_setinstancevariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
return push_insn(compiler, -1 + 0, 3, ID2SYM(rb_intern("setinstancevariable")), name, inline_storage);
}
/******************************************************************************/
/* Compile an AST node using the given compiler */
/******************************************************************************/
static void
yp_compile_node(yp_iseq_compiler_t *compiler, yp_node_t *base_node) {
switch (base_node->type) {
case YP_NODE_ALIAS_NODE: {
yp_alias_node_t *node = (yp_alias_node_t *) base_node;
push_putspecialobject(compiler, YP_SPECIALOBJECT_VMCORE);
push_putspecialobject(compiler, YP_SPECIALOBJECT_CBASE);
yp_compile_node(compiler, node->new_name);
yp_compile_node(compiler, node->old_name);
push_send(compiler, -3, yp_calldata_new(rb_intern("core#set_method_alias"), YP_CALLDATA_ARGS_SIMPLE, 3), Qnil);
return;
}
case YP_NODE_AND_NODE: {
yp_and_node_t *node = (yp_and_node_t *) base_node;
yp_compile_node(compiler, node->left);
push_dup(compiler);
VALUE branchunless = push_branchunless(compiler, Qnil);
push_pop(compiler);
yp_compile_node(compiler, node->right);
VALUE label = push_label(compiler);
rb_ary_store(branchunless, 1, label);
return;
}
case YP_NODE_ARGUMENTS_NODE: {
yp_arguments_node_t *node = (yp_arguments_node_t *) base_node;
yp_node_list_t node_list = node->arguments;
for (size_t index = 0; index < node_list.size; index++) {
yp_compile_node(compiler, node_list.nodes[index]);
}
return;
}
case YP_NODE_ARRAY_NODE: {
yp_array_node_t *node = (yp_array_node_t *) base_node;
yp_node_list_t elements = node->elements;
for (size_t index = 0; index < elements.size; index++) {
yp_compile_node(compiler, elements.nodes[index]);
}
push_newarray(compiler, sizet2int(elements.size));
return;
}
case YP_NODE_ASSOC_NODE: {
yp_assoc_node_t *node = (yp_assoc_node_t *) base_node;
yp_compile_node(compiler, node->key);
yp_compile_node(compiler, node->value);
return;
}
case YP_NODE_BLOCK_NODE: {
yp_block_node_t *node = (yp_block_node_t *) base_node;
VALUE optional_labels = rb_ary_new();
if (node->parameters &&
node->parameters->parameters &&
node->parameters->parameters->optionals.size > 0) {
compiler->arg_size += node->parameters->parameters->optionals.size;
yp_node_list_t *optionals = &node->parameters->parameters->optionals;
for (size_t i = 0; i < optionals->size; i++) {
VALUE label = push_label(compiler);
rb_ary_push(optional_labels, label);
yp_compile_node(compiler, optionals->nodes[i]);
}
VALUE label = push_label(compiler);
rb_ary_push(optional_labels, label);
rb_hash_aset(compiler->optionals, ID2SYM(rb_intern("opt")), optional_labels);
push_ruby_event(compiler, YP_RUBY_EVENT_B_CALL);
push_nop(compiler);
} else {
push_ruby_event(compiler, YP_RUBY_EVENT_B_CALL);
}
if (node->statements) {
yp_compile_node(compiler, node->statements);
} else {
push_putnil(compiler);
}
push_ruby_event(compiler, YP_RUBY_EVENT_B_RETURN);
push_leave(compiler);
return;
}
case YP_NODE_CALL_NODE: {
yp_call_node_t *node = (yp_call_node_t *) base_node;
ID mid = parse_location_symbol(&node->message_loc);
int flags = 0;
size_t orig_argc;
if (node->receiver == NULL) {
push_putself(compiler);
} else {
yp_compile_node(compiler, node->receiver);
}
if (node->arguments == NULL) {
if (flags & YP_CALLDATA_FCALL) flags |= YP_CALLDATA_VCALL;
orig_argc = 0;
} else {
yp_arguments_node_t *arguments = node->arguments;
yp_compile_node(compiler, (yp_node_t *) arguments);
orig_argc = arguments->arguments.size;
}
VALUE block_iseq = Qnil;
if (node->block != NULL) {
yp_iseq_compiler_t block_compiler;
yp_iseq_compiler_init(
&block_compiler,
compiler,
&node->block->locals,
"block in <compiled>",
YP_ISEQ_TYPE_BLOCK
);
yp_compile_node(&block_compiler, (yp_node_t *) node->block);
block_iseq = yp_iseq_new(&block_compiler);
}
if (block_iseq == Qnil && flags == 0) {
flags |= YP_CALLDATA_ARGS_SIMPLE;
}
if (node->receiver == NULL) {
flags |= YP_CALLDATA_FCALL;
if (block_iseq == Qnil && node->arguments == NULL) {
flags |= YP_CALLDATA_VCALL;
}
}
push_send(compiler, -sizet2int(orig_argc), yp_calldata_new(mid, flags, orig_argc), block_iseq);
return;
}
case YP_NODE_CLASS_VARIABLE_READ_NODE: {
yp_class_variable_read_node_t *node = (yp_class_variable_read_node_t *) base_node;
push_getclassvariable(compiler, ID2SYM(parse_node_symbol((yp_node_t *) node)), yp_inline_storage_new(compiler));
return;
}
case YP_NODE_CLASS_VARIABLE_WRITE_NODE: {
yp_class_variable_write_node_t *node = (yp_class_variable_write_node_t *) base_node;
if (node->value == NULL) {
rb_raise(rb_eNotImpError, "class variable write without value not implemented");
}
yp_compile_node(compiler, node->value);
push_dup(compiler);
push_setclassvariable(compiler, ID2SYM(parse_location_symbol(&node->name_loc)), yp_inline_storage_new(compiler));
return;
}
case YP_NODE_CONSTANT_PATH_NODE: {
yp_constant_path_node_t *node = (yp_constant_path_node_t *) base_node;
yp_compile_node(compiler, node->parent);
push_putobject(compiler, Qfalse);
push_getconstant(compiler, ID2SYM(parse_node_symbol((yp_node_t *) node->child)));
return;
}
case YP_NODE_CONSTANT_READ_NODE:
push_putnil(compiler);
push_putobject(compiler, Qtrue);
push_getconstant(compiler, ID2SYM(parse_node_symbol((yp_node_t *) base_node)));
return;
case YP_NODE_EMBEDDED_STATEMENTS_NODE: {
yp_embedded_statements_node_t *node = (yp_embedded_statements_node_t *) base_node;
yp_compile_node(compiler, (yp_node_t *) node->statements);
return;
}
case YP_NODE_FALSE_NODE:
push_putobject(compiler, Qfalse);
return;
case YP_NODE_GLOBAL_VARIABLE_READ_NODE:
push_getglobal(compiler, ID2SYM(parse_location_symbol(&base_node->location)));
return;
case YP_NODE_GLOBAL_VARIABLE_WRITE_NODE: {
yp_global_variable_write_node_t *node = (yp_global_variable_write_node_t *) base_node;
if (node->value == NULL) {
rb_raise(rb_eNotImpError, "global variable write without value not implemented");
}
yp_compile_node(compiler, node->value);
push_dup(compiler);
push_setglobal(compiler, ID2SYM(parse_location_symbol(&node->name_loc)));
return;
}
case YP_NODE_HASH_NODE: {
yp_hash_node_t *node = (yp_hash_node_t *) base_node;
yp_node_list_t elements = node->elements;
for (size_t index = 0; index < elements.size; index++) {
yp_compile_node(compiler, elements.nodes[index]);
}
push_newhash(compiler, sizet2int(elements.size * 2));
return;
}
case YP_NODE_INSTANCE_VARIABLE_READ_NODE:
push_getinstancevariable(compiler, ID2SYM(parse_node_symbol((yp_node_t *) base_node)), yp_inline_storage_new(compiler));
return;
case YP_NODE_INSTANCE_VARIABLE_WRITE_NODE: {
yp_instance_variable_write_node_t *node = (yp_instance_variable_write_node_t *) base_node;
if (node->value == NULL) {
rb_raise(rb_eNotImpError, "instance variable write without value not implemented");
}
yp_compile_node(compiler, node->value);
push_dup(compiler);
push_setinstancevariable(compiler, ID2SYM(parse_location_symbol(&node->name_loc)), yp_inline_storage_new(compiler));
return;
}
case YP_NODE_INTEGER_NODE:
push_putobject(compiler, parse_number(base_node->location.start, base_node->location.end));
return;
case YP_NODE_INTERPOLATED_STRING_NODE: {
yp_interpolated_string_node_t *node = (yp_interpolated_string_node_t *) base_node;
for (size_t index = 0; index < node->parts.size; index++) {
yp_node_t *part = node->parts.nodes[index];
switch (part->type) {
case YP_NODE_STRING_NODE: {
yp_string_node_t *string_node = (yp_string_node_t *) part;
push_putobject(compiler, parse_string(&string_node->unescaped));
break;
}
default:
yp_compile_node(compiler, part);
push_dup(compiler);
push_objtostring(compiler, yp_calldata_new(rb_intern("to_s"), YP_CALLDATA_FCALL | YP_CALLDATA_ARGS_SIMPLE, 0));
push_anytostring(compiler);
break;
}
}
push_concatstrings(compiler, sizet2int(node->parts.size));
return;
}
case YP_NODE_KEYWORD_HASH_NODE: {
yp_keyword_hash_node_t *node = (yp_keyword_hash_node_t *) base_node;
yp_node_list_t elements = node->elements;
for (size_t index = 0; index < elements.size; index++) {
yp_compile_node(compiler, elements.nodes[index]);
}
push_newhash(compiler, sizet2int(elements.size * 2));
return;
}
case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
yp_local_variable_read_node_t *node = (yp_local_variable_read_node_t *) base_node;
int index = local_index(compiler, node->constant_id, node->depth);
push_getlocal(compiler, INT2FIX(index), INT2FIX(node->depth));
return;
}
case YP_NODE_LOCAL_VARIABLE_WRITE_NODE: {
yp_local_variable_write_node_t *node = (yp_local_variable_write_node_t *) base_node;
if (node->value == NULL) {
rb_raise(rb_eNotImpError, "local variable write without value not implemented");
}
int index = local_index(compiler, node->constant_id, node->depth);
yp_compile_node(compiler, node->value);
push_dup(compiler);
push_setlocal(compiler, INT2FIX(index), INT2FIX(node->depth));
return;
}
case YP_NODE_NIL_NODE:
push_putnil(compiler);
return;
case YP_NODE_OR_NODE: {
yp_or_node_t *node = (yp_or_node_t *) base_node;
yp_compile_node(compiler, node->left);
push_dup(compiler);
VALUE branchif = push_branchif(compiler, Qnil);
push_pop(compiler);
yp_compile_node(compiler, node->right);
VALUE label = push_label(compiler);
rb_ary_store(branchif, 1, label);
return;
}
case YP_NODE_PARENTHESES_NODE: {
yp_parentheses_node_t *node = (yp_parentheses_node_t *) base_node;
if (node->statements == NULL) {
push_putnil(compiler);
} else {
yp_compile_node(compiler, node->statements);
}
return;
}
case YP_NODE_PROGRAM_NODE: {
yp_program_node_t *node = (yp_program_node_t *) base_node;
if (node->statements->body.size == 0) {
push_putnil(compiler);
} else {
yp_compile_node(compiler, (yp_node_t *) node->statements);
}
push_leave(compiler);
return;
}
case YP_NODE_RANGE_NODE: {
yp_range_node_t *node = (yp_range_node_t *) base_node;
if (node->left == NULL) {
push_putnil(compiler);
} else {
yp_compile_node(compiler, node->left);
}
if (node->right == NULL) {
push_putnil(compiler);
} else {
yp_compile_node(compiler, node->right);
}
push_newrange(compiler, INT2FIX((node->operator_loc.end - node->operator_loc.start) == 3));
return;
}
case YP_NODE_SELF_NODE:
push_putself(compiler);
return;
case YP_NODE_STATEMENTS_NODE: {
yp_statements_node_t *node = (yp_statements_node_t *) base_node;
yp_node_list_t node_list = node->body;
for (size_t index = 0; index < node_list.size; index++) {
yp_compile_node(compiler, node_list.nodes[index]);
if (index < node_list.size - 1) push_pop(compiler);
}
return;
}
case YP_NODE_STRING_NODE: {
yp_string_node_t *node = (yp_string_node_t *) base_node;
push_putstring(compiler, parse_string(&node->unescaped));
return;
}
case YP_NODE_SYMBOL_NODE: {
yp_symbol_node_t *node = (yp_symbol_node_t *) base_node;
push_putobject(compiler, ID2SYM(parse_string_symbol(&node->unescaped)));
return;
}
case YP_NODE_TRUE_NODE:
push_putobject(compiler, Qtrue);
return;
case YP_NODE_UNDEF_NODE: {
yp_undef_node_t *node = (yp_undef_node_t *) base_node;
for (size_t index = 0; index < node->names.size; index++) {
push_putspecialobject(compiler, YP_SPECIALOBJECT_VMCORE);
push_putspecialobject(compiler, YP_SPECIALOBJECT_CBASE);
yp_compile_node(compiler, node->names.nodes[index]);
push_send(compiler, -2, yp_calldata_new(rb_intern("core#undef_method"), YP_CALLDATA_ARGS_SIMPLE, 2), Qnil);
if (index < node->names.size - 1) push_pop(compiler);
}
return;
}
case YP_NODE_X_STRING_NODE: {
yp_x_string_node_t *node = (yp_x_string_node_t *) base_node;
push_putself(compiler);
push_putobject(compiler, parse_string(&node->unescaped));
push_send(compiler, -1, yp_calldata_new(rb_intern("`"), YP_CALLDATA_FCALL | YP_CALLDATA_ARGS_SIMPLE, 1), Qnil);
return;
}
case YP_NODE_OPTIONAL_PARAMETER_NODE: {
yp_optional_parameter_node_t *node = (yp_optional_parameter_node_t *) base_node;
int depth = 0;
int index = local_index(compiler, node->constant_id, depth);
yp_compile_node(compiler, node->value);
push_setlocal(compiler, INT2FIX(index), INT2FIX(depth));
break;
}
default:
rb_raise(rb_eNotImpError, "node type %d not implemented", base_node->type);
return;
}
}
// This function compiles the given node into a list of instructions.
VALUE
yp_compile(yp_node_t *node) {
assert(node->type == YP_NODE_PROGRAM_NODE);
yp_iseq_compiler_t compiler;
yp_iseq_compiler_init(
&compiler,
NULL,
&((yp_program_node_t *) node)->locals,
"<compiled>",
YP_ISEQ_TYPE_TOP
);
yp_compile_node(&compiler, node);
return yp_iseq_new(&compiler);
}

1
yarp/config.h Normal file
View File

@ -0,0 +1 @@
#include "ruby/config.h"

View File

@ -1,8 +1,20 @@
#ifndef YARP_DEFINES_H #ifndef YARP_DEFINES_H
#define YARP_DEFINES_H #define YARP_DEFINES_H
// This file should be included first by any *.h or *.c in YARP
#include "yarp/config.h"
#include <ctype.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
// YP_EXPORTED_FUNCTION // YP_EXPORTED_FUNCTION
#if defined(_WIN32) #if defined(YP_STATIC)
# define YP_EXPORTED_FUNCTION
#elif defined(_WIN32)
# define YP_EXPORTED_FUNCTION __declspec(dllexport) extern # define YP_EXPORTED_FUNCTION __declspec(dllexport) extern
#else #else
# ifndef YP_EXPORTED_FUNCTION # ifndef YP_EXPORTED_FUNCTION
@ -16,9 +28,9 @@
// YP_ATTRIBUTE_UNUSED // YP_ATTRIBUTE_UNUSED
#if defined(__GNUC__) #if defined(__GNUC__)
# define YP_ATTRIBUTE_UNUSED __attribute__((unused)) # define YP_ATTRIBUTE_UNUSED __attribute__((unused))
#else #else
# define YP_ATTRIBUTE_UNUSED # define YP_ATTRIBUTE_UNUSED
#endif #endif
// inline // inline
@ -26,4 +38,13 @@
# define inline __inline # define inline __inline
#endif #endif
int yp_strncasecmp(const char *string1, const char *string2, size_t length);
int yp_snprintf(char *dest, YP_ATTRIBUTE_UNUSED size_t size, const char *format, ...);
#if defined(HAVE_SNPRINTF)
// We use snprintf if it's available
# define yp_snprintf snprintf
#endif
#endif #endif

View File

@ -2,12 +2,11 @@
#define YARP_DIAGNOSTIC_H #define YARP_DIAGNOSTIC_H
#include "yarp/defines.h" #include "yarp/defines.h"
#include "yarp/util/yp_list.h"
#include <stdbool.h> #include <stdbool.h>
#include <stdlib.h> #include <stdlib.h>
#include "yarp/util/yp_list.h"
// This struct represents a diagnostic found during parsing. // This struct represents a diagnostic found during parsing.
typedef struct { typedef struct {
yp_list_node_t node; yp_list_node_t node;

View File

@ -51,7 +51,8 @@ yp_encoding_t yp_encoding_ascii = {
.char_width = yp_encoding_ascii_char_width, .char_width = yp_encoding_ascii_char_width,
.alnum_char = yp_encoding_ascii_alnum_char, .alnum_char = yp_encoding_ascii_alnum_char,
.alpha_char = yp_encoding_ascii_alpha_char, .alpha_char = yp_encoding_ascii_alpha_char,
.isupper_char = yp_encoding_ascii_isupper_char .isupper_char = yp_encoding_ascii_isupper_char,
.multibyte = false
}; };
yp_encoding_t yp_encoding_ascii_8bit = { yp_encoding_t yp_encoding_ascii_8bit = {
@ -60,4 +61,5 @@ yp_encoding_t yp_encoding_ascii_8bit = {
.alnum_char = yp_encoding_ascii_alnum_char, .alnum_char = yp_encoding_ascii_alnum_char,
.alpha_char = yp_encoding_ascii_alpha_char, .alpha_char = yp_encoding_ascii_alpha_char,
.isupper_char = yp_encoding_ascii_isupper_char, .isupper_char = yp_encoding_ascii_isupper_char,
.multibyte = false
}; };

View File

@ -74,5 +74,6 @@ yp_encoding_t yp_encoding_big5 = {
.char_width = yp_encoding_big5_char_width, .char_width = yp_encoding_big5_char_width,
.alnum_char = yp_encoding_big5_alnum_char, .alnum_char = yp_encoding_big5_alnum_char,
.alpha_char = yp_encoding_big5_alpha_char, .alpha_char = yp_encoding_big5_alpha_char,
.isupper_char = yp_encoding_big5_isupper_char .isupper_char = yp_encoding_big5_isupper_char,
.multibyte = true
}; };

View File

@ -12,11 +12,28 @@
// Each callback should return the number of bytes, or 0 if the next bytes are // Each callback should return the number of bytes, or 0 if the next bytes are
// invalid for the encoding and type. // invalid for the encoding and type.
typedef struct { typedef struct {
const char *name; // Return the number of bytes that the next character takes if it is valid
// in the encoding.
size_t (*char_width)(const char *c); size_t (*char_width)(const char *c);
// Return the number of bytes that the next character takes if it is valid
// in the encoding and is alphabetical.
size_t (*alpha_char)(const char *c); size_t (*alpha_char)(const char *c);
// Return the number of bytes that the next character takes if it is valid
// in the encoding and is alphanumeric.
size_t (*alnum_char)(const char *c); size_t (*alnum_char)(const char *c);
// Return true if the next character is valid in the encoding and is an
// uppercase character.
bool (*isupper_char)(const char *c); bool (*isupper_char)(const char *c);
// The name of the encoding. This should correspond to a value that can be
// passed to Encoding.find in Ruby.
const char *name;
// Return true if the encoding is a multibyte encoding.
bool multibyte;
} yp_encoding_t; } yp_encoding_t;
// These bits define the location of each bit of metadata within the various // These bits define the location of each bit of metadata within the various

View File

@ -77,5 +77,6 @@ yp_encoding_t yp_encoding_euc_jp = {
.char_width = yp_encoding_euc_jp_char_width, .char_width = yp_encoding_euc_jp_char_width,
.alnum_char = yp_encoding_euc_jp_alnum_char, .alnum_char = yp_encoding_euc_jp_alnum_char,
.alpha_char = yp_encoding_euc_jp_alpha_char, .alpha_char = yp_encoding_euc_jp_alpha_char,
.isupper_char = yp_encoding_euc_jp_isupper_char .isupper_char = yp_encoding_euc_jp_isupper_char,
.multibyte = true
}; };

View File

@ -80,5 +80,6 @@ yp_encoding_t yp_encoding_gbk = {
.char_width = yp_encoding_gbk_char_width, .char_width = yp_encoding_gbk_char_width,
.alnum_char = yp_encoding_gbk_alnum_char, .alnum_char = yp_encoding_gbk_alnum_char,
.alpha_char = yp_encoding_gbk_alpha_char, .alpha_char = yp_encoding_gbk_alpha_char,
.isupper_char = yp_encoding_gbk_isupper_char .isupper_char = yp_encoding_gbk_isupper_char,
.multibyte = true
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_1 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_1_alnum_char, .alnum_char = yp_encoding_iso_8859_1_alnum_char,
.alpha_char = yp_encoding_iso_8859_1_alpha_char, .alpha_char = yp_encoding_iso_8859_1_alpha_char,
.isupper_char = yp_encoding_iso_8859_1_isupper_char .isupper_char = yp_encoding_iso_8859_1_isupper_char,
.multibyte = false
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_10 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_10_alnum_char, .alnum_char = yp_encoding_iso_8859_10_alnum_char,
.alpha_char = yp_encoding_iso_8859_10_alpha_char, .alpha_char = yp_encoding_iso_8859_10_alpha_char,
.isupper_char = yp_encoding_iso_8859_10_isupper_char .isupper_char = yp_encoding_iso_8859_10_isupper_char,
.multibyte = false
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_11 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_11_alnum_char, .alnum_char = yp_encoding_iso_8859_11_alnum_char,
.alpha_char = yp_encoding_iso_8859_11_alpha_char, .alpha_char = yp_encoding_iso_8859_11_alpha_char,
.isupper_char = yp_encoding_iso_8859_11_isupper_char .isupper_char = yp_encoding_iso_8859_11_isupper_char,
.multibyte = false
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_13 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_13_alnum_char, .alnum_char = yp_encoding_iso_8859_13_alnum_char,
.alpha_char = yp_encoding_iso_8859_13_alpha_char, .alpha_char = yp_encoding_iso_8859_13_alpha_char,
.isupper_char = yp_encoding_iso_8859_13_isupper_char .isupper_char = yp_encoding_iso_8859_13_isupper_char,
.multibyte = false
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_14 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_14_alnum_char, .alnum_char = yp_encoding_iso_8859_14_alnum_char,
.alpha_char = yp_encoding_iso_8859_14_alpha_char, .alpha_char = yp_encoding_iso_8859_14_alpha_char,
.isupper_char = yp_encoding_iso_8859_14_isupper_char .isupper_char = yp_encoding_iso_8859_14_isupper_char,
.multibyte = false
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_15 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_15_alnum_char, .alnum_char = yp_encoding_iso_8859_15_alnum_char,
.alpha_char = yp_encoding_iso_8859_15_alpha_char, .alpha_char = yp_encoding_iso_8859_15_alpha_char,
.isupper_char = yp_encoding_iso_8859_15_isupper_char .isupper_char = yp_encoding_iso_8859_15_isupper_char,
.multibyte = false
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_16 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_16_alnum_char, .alnum_char = yp_encoding_iso_8859_16_alnum_char,
.alpha_char = yp_encoding_iso_8859_16_alpha_char, .alpha_char = yp_encoding_iso_8859_16_alpha_char,
.isupper_char = yp_encoding_iso_8859_16_isupper_char .isupper_char = yp_encoding_iso_8859_16_isupper_char,
.multibyte = false
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_2 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_2_alnum_char, .alnum_char = yp_encoding_iso_8859_2_alnum_char,
.alpha_char = yp_encoding_iso_8859_2_alpha_char, .alpha_char = yp_encoding_iso_8859_2_alpha_char,
.isupper_char = yp_encoding_iso_8859_2_isupper_char .isupper_char = yp_encoding_iso_8859_2_isupper_char,
.multibyte = false
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_3 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_3_alnum_char, .alnum_char = yp_encoding_iso_8859_3_alnum_char,
.alpha_char = yp_encoding_iso_8859_3_alpha_char, .alpha_char = yp_encoding_iso_8859_3_alpha_char,
.isupper_char = yp_encoding_iso_8859_3_isupper_char .isupper_char = yp_encoding_iso_8859_3_isupper_char,
.multibyte = false
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_4 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_4_alnum_char, .alnum_char = yp_encoding_iso_8859_4_alnum_char,
.alpha_char = yp_encoding_iso_8859_4_alpha_char, .alpha_char = yp_encoding_iso_8859_4_alpha_char,
.isupper_char = yp_encoding_iso_8859_4_isupper_char .isupper_char = yp_encoding_iso_8859_4_isupper_char,
.multibyte = false
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_5 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_5_alnum_char, .alnum_char = yp_encoding_iso_8859_5_alnum_char,
.alpha_char = yp_encoding_iso_8859_5_alpha_char, .alpha_char = yp_encoding_iso_8859_5_alpha_char,
.isupper_char = yp_encoding_iso_8859_5_isupper_char .isupper_char = yp_encoding_iso_8859_5_isupper_char,
.multibyte = false
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_6 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_6_alnum_char, .alnum_char = yp_encoding_iso_8859_6_alnum_char,
.alpha_char = yp_encoding_iso_8859_6_alpha_char, .alpha_char = yp_encoding_iso_8859_6_alpha_char,
.isupper_char = yp_encoding_iso_8859_6_isupper_char .isupper_char = yp_encoding_iso_8859_6_isupper_char,
.multibyte = false
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_7 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_7_alnum_char, .alnum_char = yp_encoding_iso_8859_7_alnum_char,
.alpha_char = yp_encoding_iso_8859_7_alpha_char, .alpha_char = yp_encoding_iso_8859_7_alpha_char,
.isupper_char = yp_encoding_iso_8859_7_isupper_char .isupper_char = yp_encoding_iso_8859_7_isupper_char,
.multibyte = false
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_8 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_8_alnum_char, .alnum_char = yp_encoding_iso_8859_8_alnum_char,
.alpha_char = yp_encoding_iso_8859_8_alpha_char, .alpha_char = yp_encoding_iso_8859_8_alpha_char,
.isupper_char = yp_encoding_iso_8859_8_isupper_char .isupper_char = yp_encoding_iso_8859_8_isupper_char,
.multibyte = false
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_9 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_iso_8859_9_alnum_char, .alnum_char = yp_encoding_iso_8859_9_alnum_char,
.alpha_char = yp_encoding_iso_8859_9_alpha_char, .alpha_char = yp_encoding_iso_8859_9_alpha_char,
.isupper_char = yp_encoding_iso_8859_9_isupper_char .isupper_char = yp_encoding_iso_8859_9_isupper_char,
.multibyte = false
}; };

View File

@ -51,5 +51,6 @@ yp_encoding_t yp_encoding_koi8_r = {
.char_width = yp_encoding_koi8_r_char_width, .char_width = yp_encoding_koi8_r_char_width,
.alnum_char = yp_encoding_koi8_r_alnum_char, .alnum_char = yp_encoding_koi8_r_alnum_char,
.alpha_char = yp_encoding_koi8_r_alpha_char, .alpha_char = yp_encoding_koi8_r_alpha_char,
.isupper_char = yp_encoding_koi8_r_isupper_char .isupper_char = yp_encoding_koi8_r_isupper_char,
.multibyte = false
}; };

View File

@ -77,5 +77,6 @@ yp_encoding_t yp_encoding_shift_jis = {
.char_width = yp_encoding_shift_jis_char_width, .char_width = yp_encoding_shift_jis_char_width,
.alnum_char = yp_encoding_shift_jis_alnum_char, .alnum_char = yp_encoding_shift_jis_alnum_char,
.alpha_char = yp_encoding_shift_jis_alpha_char, .alpha_char = yp_encoding_shift_jis_alpha_char,
.isupper_char = yp_encoding_shift_jis_isupper_char .isupper_char = yp_encoding_shift_jis_isupper_char,
.multibyte = true
}; };

View File

@ -2230,7 +2230,7 @@ utf_8_codepoint(const unsigned char *c, size_t *width) {
codepoint = (state != 0) ? codepoint = (state != 0) ?
(byte & 0x3fu) | (codepoint << 6) : (byte & 0x3fu) | (codepoint << 6) :
(0xff >> type) & (byte); (0xffu >> type) & (byte);
state = utf_8_dfa[256 + (state * 16) + type]; state = utf_8_dfa[256 + (state * 16) + type];
if (!state) { if (!state) {
@ -2312,5 +2312,6 @@ yp_encoding_t yp_encoding_utf_8 = {
.char_width = yp_encoding_utf_8_char_width, .char_width = yp_encoding_utf_8_char_width,
.alnum_char = yp_encoding_utf_8_alnum_char, .alnum_char = yp_encoding_utf_8_alnum_char,
.alpha_char = yp_encoding_utf_8_alpha_char, .alpha_char = yp_encoding_utf_8_alpha_char,
.isupper_char = yp_encoding_utf_8_isupper_char .isupper_char = yp_encoding_utf_8_isupper_char,
.multibyte = true
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_windows_1251 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_windows_1251_alnum_char, .alnum_char = yp_encoding_windows_1251_alnum_char,
.alpha_char = yp_encoding_windows_1251_alpha_char, .alpha_char = yp_encoding_windows_1251_alpha_char,
.isupper_char = yp_encoding_windows_1251_isupper_char .isupper_char = yp_encoding_windows_1251_isupper_char,
.multibyte = false
}; };

View File

@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_windows_1252 = {
.char_width = yp_encoding_single_char_width, .char_width = yp_encoding_single_char_width,
.alnum_char = yp_encoding_windows_1252_alnum_char, .alnum_char = yp_encoding_windows_1252_alnum_char,
.alpha_char = yp_encoding_windows_1252_alpha_char, .alpha_char = yp_encoding_windows_1252_alpha_char,
.isupper_char = yp_encoding_windows_1252_isupper_char .isupper_char = yp_encoding_windows_1252_isupper_char,
.multibyte = false
}; };

View File

@ -77,5 +77,6 @@ yp_encoding_t yp_encoding_windows_31j = {
.char_width = yp_encoding_windows_31j_char_width, .char_width = yp_encoding_windows_31j_char_width,
.alnum_char = yp_encoding_windows_31j_alnum_char, .alnum_char = yp_encoding_windows_31j_alnum_char,
.alpha_char = yp_encoding_windows_31j_alpha_char, .alpha_char = yp_encoding_windows_31j_alpha_char,
.isupper_char = yp_encoding_windows_31j_isupper_char .isupper_char = yp_encoding_windows_31j_isupper_char,
.multibyte = true
}; };

View File

@ -1,6 +1,7 @@
#include "yarp/extension.h" #include "yarp/extension.h"
VALUE rb_cYARP; VALUE rb_cYARP;
VALUE rb_cYARPSource;
VALUE rb_cYARPToken; VALUE rb_cYARPToken;
VALUE rb_cYARPLocation; VALUE rb_cYARPLocation;
@ -9,51 +10,97 @@ VALUE rb_cYARPParseError;
VALUE rb_cYARPParseWarning; VALUE rb_cYARPParseWarning;
VALUE rb_cYARPParseResult; VALUE rb_cYARPParseResult;
// Represents a source of Ruby code. It can either be coming from a file or a /******************************************************************************/
// string. If it's a file, it's going to mmap the contents of the file. If it's /* IO of Ruby code */
// a string it's going to just point to the contents of the string. /******************************************************************************/
// Represents an input of Ruby code. It can either be coming from a file or a
// string. If it's a file, we'll use demand paging to read the contents of the
// file into a string. If it's already a string, we'll reference it directly.
typedef struct { typedef struct {
enum { SOURCE_FILE, SOURCE_STRING } type;
const char *source; const char *source;
size_t size; size_t size;
} source_t; } input_t;
// Check if the given filepath is a string. If it's nil, then return NULL. If
// it's not a string, then raise a type error. Otherwise return the filepath as
// a C string.
static const char *
check_filepath(VALUE filepath) {
// If the filepath is nil, then we don't need to do anything.
if (NIL_P(filepath)) {
return NULL;
}
// Check if the filepath is a string. If it's not, then raise a type error.
if (!RB_TYPE_P(filepath, T_STRING)) {
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
}
// Otherwise, return the filepath as a C string.
return StringValueCStr(filepath);
}
// Read the file indicated by the filepath parameter into source and load its // Read the file indicated by the filepath parameter into source and load its
// contents and size into the given source_t. // contents and size into the given input_t.
//
// We want to use demand paging as much as possible in order to avoid having to
// read the entire file into memory (which could be detrimental to performance
// for large files). This means that if we're on windows we'll use
// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
// `mmap`, and on other POSIX systems we'll use `read`.
static int static int
source_file_load(source_t *source, VALUE filepath) { input_load_filepath(input_t *input, const char *filepath) {
#ifdef _WIN32 #ifdef _WIN32
HANDLE file = CreateFile( // Open the file for reading.
StringValueCStr(filepath), HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
GENERIC_READ,
0,
NULL,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL
);
if (file == INVALID_HANDLE_VALUE) { if (file == INVALID_HANDLE_VALUE) {
perror("Invalid handle for file"); perror("CreateFile failed");
return 1; return 1;
} }
// Get the file size.
DWORD file_size = GetFileSize(file, NULL); DWORD file_size = GetFileSize(file, NULL);
source->source = malloc(file_size); if (file_size == INVALID_FILE_SIZE) {
CloseHandle(file);
perror("GetFileSize failed");
return 1;
}
DWORD bytes_read; // If the file is empty, then we don't need to do anything else, we'll set
BOOL success = ReadFile(file, DISCARD_CONST_QUAL(void *, source->source), file_size, &bytes_read, NULL); // the source to a constant empty string and return.
if (!file_size) {
CloseHandle(file);
input->size = 0;
input->source = "";
return 0;
}
// Create a mapping of the file.
HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
if (mapping == NULL) {
CloseHandle(file);
perror("CreateFileMapping failed");
return 1;
}
// Map the file into memory.
input->source = (const char *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
CloseHandle(mapping);
CloseHandle(file); CloseHandle(file);
if (!success) { if (input->source == NULL) {
perror("ReadFile failed"); perror("MapViewOfFile failed");
return 1; return 1;
} }
source->size = (size_t) file_size; // Set the size of the source.
input->size = (size_t) file_size;
return 0; return 0;
#else #else
// Open the file for reading // Open the file for reading
int fd = open(StringValueCStr(filepath), O_RDONLY); int fd = open(filepath, O_RDONLY);
if (fd == -1) { if (fd == -1) {
perror("open"); perror("open");
return 1; return 1;
@ -68,30 +115,30 @@ source_file_load(source_t *source, VALUE filepath) {
} }
// mmap the file descriptor to virtually get the contents // mmap the file descriptor to virtually get the contents
source->size = sb.st_size; input->size = sb.st_size;
#ifdef HAVE_MMAP #ifdef HAVE_MMAP
if (!source->size) { if (!input->size) {
close(fd); close(fd);
source->source = ""; input->source = "";
return 0; return 0;
} }
char * res = mmap(NULL, source->size, PROT_READ, MAP_PRIVATE, fd, 0); const char *result = mmap(NULL, input->size, PROT_READ, MAP_PRIVATE, fd, 0);
if (res == MAP_FAILED) { if (result == MAP_FAILED) {
perror("Map failed"); perror("Map failed");
return 1; return 1;
} else { } else {
source->source = res; input->source = result;
} }
#else #else
source->source = malloc(source->size); input->source = malloc(input->size);
if (source->source == NULL) return 1; if (input->source == NULL) return 1;
ssize_t read_size = read(fd, (void *)source->source, source->size); ssize_t read_size = read(fd, (void *) input->source, input->size);
if (read_size < 0 || (size_t)read_size != source->size) { if (read_size < 0 || (size_t)read_size != input->size) {
perror("Read size is incorrect"); perror("Read size is incorrect");
free((void *)source->source); free((void *) input->source);
return 1; return 1;
} }
#endif #endif
@ -101,86 +148,106 @@ source_file_load(source_t *source, VALUE filepath) {
#endif #endif
} }
// Load the contents and size of the given string into the given source_t. // Load the contents and size of the given string into the given input_t.
static void static void
source_string_load(source_t *source, VALUE string) { input_load_string(input_t *input, VALUE string) {
*source = (source_t) { // Check if the string is a string. If it's not, then raise a type error.
.type = SOURCE_STRING, if (!RB_TYPE_P(string, T_STRING)) {
.source = RSTRING_PTR(string), rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string));
.size = RSTRING_LEN(string), }
};
input->source = RSTRING_PTR(string);
input->size = RSTRING_LEN(string);
} }
// Free any resources associated with the given source_t. // Free any resources associated with the given input_t. This is the corollary
// function to source_file_load. It will unmap the file if it was mapped, or
// free the memory if it was allocated.
static void static void
source_file_unload(source_t *source) { input_unload_filepath(input_t *input) {
#ifdef _WIN32 // We don't need to free anything with 0 sized files because we handle that
free((void *)source->source); // with a constant string instead.
if (!input->size) return;
void *memory = (void *) input->source;
#if defined(_WIN32)
UnmapViewOfFile(memory);
#elif defined(HAVE_MMAP)
munmap(memory, input->size);
#else #else
#ifdef HAVE_MMAP free(memory);
munmap((void *)source->source, source->size);
#else
free((void *)source->source);
#endif
#endif #endif
} }
// Dump the AST corresponding to the given source to a string. /******************************************************************************/
/* Serializing the AST */
/******************************************************************************/
// Dump the AST corresponding to the given input to a string.
static VALUE static VALUE
dump_source(source_t *source, const char *filepath) { dump_input(input_t *input, const char *filepath) {
yp_buffer_t buffer;
if (!yp_buffer_init(&buffer)) {
rb_raise(rb_eNoMemError, "failed to allocate memory");
}
yp_parser_t parser; yp_parser_t parser;
yp_parser_init(&parser, source->source, source->size, filepath); yp_parser_init(&parser, input->source, input->size, filepath);
yp_node_t *node = yp_parse(&parser); yp_node_t *node = yp_parse(&parser);
yp_buffer_t buffer;
if (!yp_buffer_init(&buffer)) rb_raise(rb_eNoMemError, "failed to allocate memory");
yp_serialize(&parser, node, &buffer); yp_serialize(&parser, node, &buffer);
VALUE dumped = rb_str_new(buffer.value, buffer.length);
VALUE result = rb_str_new(buffer.value, buffer.length);
yp_node_destroy(&parser, node); yp_node_destroy(&parser, node);
yp_buffer_free(&buffer); yp_buffer_free(&buffer);
yp_parser_free(&parser); yp_parser_free(&parser);
return dumped; return result;
} }
// Dump the AST corresponding to the given string to a string. // Dump the AST corresponding to the given string to a string.
static VALUE static VALUE
dump(VALUE self, VALUE string, VALUE filepath) { dump(int argc, VALUE *argv, VALUE self) {
source_t source; VALUE string;
source_string_load(&source, string); VALUE filepath;
char *str = NULL; rb_scan_args(argc, argv, "11", &string, &filepath);
if (filepath != Qnil) { input_t input;
str = StringValueCStr(filepath); input_load_string(&input, string);
} return dump_input(&input, check_filepath(filepath));
return dump_source(&source, str);
} }
// Dump the AST corresponding to the given file to a string. // Dump the AST corresponding to the given file to a string.
static VALUE static VALUE
dump_file(VALUE self, VALUE filepath) { dump_file(VALUE self, VALUE filepath) {
source_t source; input_t input;
if (source_file_load(&source, filepath) != 0) return Qnil;
const char *checked = check_filepath(filepath);
if (input_load_filepath(&input, checked) != 0) return Qnil;
VALUE value = dump_input(&input, checked);
input_unload_filepath(&input);
VALUE value = dump_source(&source, StringValueCStr(filepath));
source_file_unload(&source);
return value; return value;
} }
/******************************************************************************/
/* Extracting values for the parse result */
/******************************************************************************/
// Extract the comments out of the parser into an array. // Extract the comments out of the parser into an array.
static VALUE static VALUE
parser_comments(yp_parser_t *parser) { parser_comments(yp_parser_t *parser, VALUE source) {
VALUE comments = rb_ary_new(); VALUE comments = rb_ary_new();
yp_comment_t *comment;
for (comment = (yp_comment_t *) parser->comment_list.head; comment != NULL; comment = (yp_comment_t *) comment->node.next) { for (yp_comment_t *comment = (yp_comment_t *) parser->comment_list.head; comment != NULL; comment = (yp_comment_t *) comment->node.next) {
VALUE location_argv[] = { LONG2FIX(comment->start - parser->start), LONG2FIX(comment->end - parser->start) }; VALUE location_argv[] = {
source,
LONG2FIX(comment->start - parser->start),
LONG2FIX(comment->end - parser->start)
};
VALUE type; VALUE type;
switch (comment->type) { switch (comment->type) {
case YP_COMMENT_INLINE: case YP_COMMENT_INLINE:
type = ID2SYM(rb_intern("inline")); type = ID2SYM(rb_intern("inline"));
@ -196,7 +263,7 @@ parser_comments(yp_parser_t *parser) {
break; break;
} }
VALUE comment_argv[] = { type, rb_class_new_instance(2, location_argv, rb_cYARPLocation) }; VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv, rb_cYARPLocation) };
rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cYARPComment)); rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cYARPComment));
} }
@ -205,19 +272,20 @@ parser_comments(yp_parser_t *parser) {
// Extract the errors out of the parser into an array. // Extract the errors out of the parser into an array.
static VALUE static VALUE
parser_errors(yp_parser_t *parser, rb_encoding *encoding) { parser_errors(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
VALUE errors = rb_ary_new(); VALUE errors = rb_ary_new();
yp_diagnostic_t *error; yp_diagnostic_t *error;
for (error = (yp_diagnostic_t *) parser->error_list.head; error != NULL; error = (yp_diagnostic_t *) error->node.next) { for (error = (yp_diagnostic_t *) parser->error_list.head; error != NULL; error = (yp_diagnostic_t *) error->node.next) {
VALUE location_argv[] = { VALUE location_argv[] = {
source,
LONG2FIX(error->start - parser->start), LONG2FIX(error->start - parser->start),
LONG2FIX(error->end - parser->start) LONG2FIX(error->end - parser->start)
}; };
VALUE error_argv[] = { VALUE error_argv[] = {
rb_enc_str_new_cstr(error->message, encoding), rb_enc_str_new_cstr(error->message, encoding),
rb_class_new_instance(2, location_argv, rb_cYARPLocation) rb_class_new_instance(3, location_argv, rb_cYARPLocation)
}; };
rb_ary_push(errors, rb_class_new_instance(2, error_argv, rb_cYARPParseError)); rb_ary_push(errors, rb_class_new_instance(2, error_argv, rb_cYARPParseError));
@ -228,19 +296,20 @@ parser_errors(yp_parser_t *parser, rb_encoding *encoding) {
// Extract the warnings out of the parser into an array. // Extract the warnings out of the parser into an array.
static VALUE static VALUE
parser_warnings(yp_parser_t *parser, rb_encoding *encoding) { parser_warnings(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
VALUE warnings = rb_ary_new(); VALUE warnings = rb_ary_new();
yp_diagnostic_t *warning; yp_diagnostic_t *warning;
for (warning = (yp_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (yp_diagnostic_t *) warning->node.next) { for (warning = (yp_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (yp_diagnostic_t *) warning->node.next) {
VALUE location_argv[] = { VALUE location_argv[] = {
source,
LONG2FIX(warning->start - parser->start), LONG2FIX(warning->start - parser->start),
LONG2FIX(warning->end - parser->start) LONG2FIX(warning->end - parser->start)
}; };
VALUE warning_argv[] = { VALUE warning_argv[] = {
rb_enc_str_new_cstr(warning->message, encoding), rb_enc_str_new_cstr(warning->message, encoding),
rb_class_new_instance(2, location_argv, rb_cYARPLocation) rb_class_new_instance(3, location_argv, rb_cYARPLocation)
}; };
rb_ary_push(warnings, rb_class_new_instance(2, warning_argv, rb_cYARPParseWarning)); rb_ary_push(warnings, rb_class_new_instance(2, warning_argv, rb_cYARPParseWarning));
@ -249,22 +318,36 @@ parser_warnings(yp_parser_t *parser, rb_encoding *encoding) {
return warnings; return warnings;
} }
/******************************************************************************/
/* Lexing Ruby code */
/******************************************************************************/
// This struct gets stored in the parser and passed in to the lex callback any
// time a new token is found. We use it to store the necessary information to
// initialize a Token instance.
typedef struct { typedef struct {
VALUE source;
VALUE tokens; VALUE tokens;
rb_encoding *encoding; rb_encoding *encoding;
} lex_data_t; } lex_data_t;
// This is passed as a callback to the parser. It gets called every time a new
// token is found. Once found, we initialize a new instance of Token and push it
// onto the tokens array.
static void static void
lex_token(void *data, yp_parser_t *parser, yp_token_t *token) { lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data; lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
VALUE yields = rb_ary_new_capa(2); VALUE yields = rb_ary_new_capa(2);
rb_ary_push(yields, yp_token_new(parser, token, lex_data->encoding)); rb_ary_push(yields, yp_token_new(parser, token, lex_data->encoding, lex_data->source));
rb_ary_push(yields, INT2FIX(parser->lex_state)); rb_ary_push(yields, INT2FIX(parser->lex_state));
rb_ary_push(lex_data->tokens, yields); rb_ary_push(lex_data->tokens, yields);
} }
// This is called whenever the encoding changes based on the magic comment at
// the top of the file. We use it to update the encoding that we are using to
// create tokens.
static void static void
lex_encoding_changed_callback(yp_parser_t *parser) { lex_encoding_changed_callback(yp_parser_t *parser) {
lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data; lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
@ -273,30 +356,42 @@ lex_encoding_changed_callback(yp_parser_t *parser) {
// Return an array of tokens corresponding to the given source. // Return an array of tokens corresponding to the given source.
static VALUE static VALUE
lex_source(source_t *source, char *filepath) { lex_input(input_t *input, const char *filepath) {
yp_parser_t parser; yp_parser_t parser;
yp_parser_init(&parser, source->source, source->size, filepath); yp_parser_init(&parser, input->source, input->size, filepath);
yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback); yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback);
VALUE offsets = rb_ary_new();
VALUE source_argv[] = { rb_str_new(input->source, input->size), offsets };
VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
lex_data_t lex_data = { lex_data_t lex_data = {
.source = source,
.tokens = rb_ary_new(), .tokens = rb_ary_new(),
.encoding = rb_utf8_encoding() .encoding = rb_utf8_encoding()
}; };
void *data = (void *) &lex_data; lex_data_t *data = &lex_data;
yp_lex_callback_t lex_callback = (yp_lex_callback_t) { yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
.data = data, .data = (void *) data,
.callback = lex_token, .callback = lex_token,
}; };
parser.lex_callback = &lex_callback; parser.lex_callback = &lex_callback;
yp_node_t *node = yp_parse(&parser); yp_node_t *node = yp_parse(&parser);
// Here we need to update the source range to have the correct newline
// offsets. We do it here because we've already created the object and given
// it over to all of the tokens.
for (size_t index = 0; index < parser.newline_list.size; index++) {
rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
}
VALUE result_argv[] = { VALUE result_argv[] = {
lex_data.tokens, lex_data.tokens,
parser_comments(&parser), parser_comments(&parser, source),
parser_errors(&parser, lex_data.encoding), parser_errors(&parser, lex_data.encoding, source),
parser_warnings(&parser, lex_data.encoding) parser_warnings(&parser, lex_data.encoding, source)
}; };
VALUE result = rb_class_new_instance(4, result_argv, rb_cYARPParseResult); VALUE result = rb_class_new_instance(4, result_argv, rb_cYARPParseResult);
@ -309,40 +404,49 @@ lex_source(source_t *source, char *filepath) {
// Return an array of tokens corresponding to the given string. // Return an array of tokens corresponding to the given string.
static VALUE static VALUE
lex(VALUE self, VALUE string, VALUE filepath) { lex(int argc, VALUE *argv, VALUE self) {
source_t source; VALUE string;
source_string_load(&source, string); VALUE filepath;
char *filepath_char = NULL; rb_scan_args(argc, argv, "11", &string, &filepath);
if (filepath) {
filepath_char = StringValueCStr(filepath); input_t input;
} input_load_string(&input, string);
return lex_source(&source, filepath_char); return lex_input(&input, check_filepath(filepath));
} }
// Return an array of tokens corresponding to the given file. // Return an array of tokens corresponding to the given file.
static VALUE static VALUE
lex_file(VALUE self, VALUE filepath) { lex_file(VALUE self, VALUE filepath) {
source_t source; input_t input;
if (source_file_load(&source, filepath) != 0) return Qnil;
const char *checked = check_filepath(filepath);
if (input_load_filepath(&input, checked) != 0) return Qnil;
VALUE value = lex_input(&input, checked);
input_unload_filepath(&input);
VALUE value = lex_source(&source, StringValueCStr(filepath));
source_file_unload(&source);
return value; return value;
} }
/******************************************************************************/
/* Parsing Ruby code */
/******************************************************************************/
// Parse the given input and return a ParseResult instance.
static VALUE static VALUE
parse_source(source_t *source, char *filepath) { parse_input(input_t *input, const char *filepath) {
yp_parser_t parser; yp_parser_t parser;
yp_parser_init(&parser, source->source, source->size, filepath); yp_parser_init(&parser, input->source, input->size, filepath);
yp_node_t *node = yp_parse(&parser); yp_node_t *node = yp_parse(&parser);
rb_encoding *encoding = rb_enc_find(parser.encoding.name); rb_encoding *encoding = rb_enc_find(parser.encoding.name);
VALUE source = yp_source_new(&parser);
VALUE result_argv[] = { VALUE result_argv[] = {
yp_ast_new(&parser, node, encoding), yp_ast_new(&parser, node, encoding),
parser_comments(&parser), parser_comments(&parser, source),
parser_errors(&parser, encoding), parser_errors(&parser, encoding, source),
parser_warnings(&parser, encoding) parser_warnings(&parser, encoding, source)
}; };
VALUE result = rb_class_new_instance(4, result_argv, rb_cYARPParseResult); VALUE result = rb_class_new_instance(4, result_argv, rb_cYARPParseResult);
@ -353,40 +457,58 @@ parse_source(source_t *source, char *filepath) {
return result; return result;
} }
// Parse the given string and return a ParseResult instance.
static VALUE static VALUE
parse(VALUE self, VALUE string, VALUE filepath) { parse(int argc, VALUE *argv, VALUE self) {
source_t source; VALUE string;
source_string_load(&source, string); VALUE filepath;
rb_scan_args(argc, argv, "11", &string, &filepath);
input_t input;
input_load_string(&input, string);
#ifdef YARP_DEBUG_MODE_BUILD #ifdef YARP_DEBUG_MODE_BUILD
char* dup = malloc(source.size); char* dup = malloc(input.size);
memcpy(dup, source.source, source.size); memcpy(dup, input.source, input.size);
source.source = dup; input.source = dup;
#endif #endif
VALUE value = parse_source(&source, NIL_P(filepath) ? NULL : StringValueCStr(filepath));
VALUE value = parse_input(&input, check_filepath(filepath));
#ifdef YARP_DEBUG_MODE_BUILD #ifdef YARP_DEBUG_MODE_BUILD
free(dup); free(dup);
#endif #endif
return value; return value;
} }
// Parse the given file and return a ParseResult instance.
static VALUE static VALUE
parse_file(VALUE self, VALUE rb_filepath) { parse_file(VALUE self, VALUE filepath) {
source_t source; input_t input;
if (source_file_load(&source, rb_filepath) != 0) {
return Qnil; const char *checked = check_filepath(filepath);
} if (input_load_filepath(&input, checked) != 0) return Qnil;
VALUE value = parse_input(&input, checked);
input_unload_filepath(&input);
VALUE value = parse_source(&source, StringValueCStr(rb_filepath));
source_file_unload(&source);
return value; return value;
} }
/******************************************************************************/
/* Utility functions exposed to make testing easier */
/******************************************************************************/
// Returns an array of strings corresponding to the named capture groups in the
// given source string. If YARP was unable to parse the regular expression, this
// function returns nil.
static VALUE static VALUE
named_captures(VALUE self, VALUE rb_source) { named_captures(VALUE self, VALUE source) {
yp_string_list_t string_list; yp_string_list_t string_list;
yp_string_list_init(&string_list); yp_string_list_init(&string_list);
if (!yp_regexp_named_capture_group_names(RSTRING_PTR(rb_source), RSTRING_LEN(rb_source), &string_list)) { if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list)) {
yp_string_list_free(&string_list); yp_string_list_free(&string_list);
return Qnil; return Qnil;
} }
@ -401,6 +523,8 @@ named_captures(VALUE self, VALUE rb_source) {
return names; return names;
} }
// Accepts a source string and a type of unescaping and returns the unescaped
// version.
static VALUE static VALUE
unescape(VALUE source, yp_unescape_type_t unescape_type) { unescape(VALUE source, yp_unescape_type_t unescape_type) {
yp_string_t string; yp_string_t string;
@ -409,7 +533,13 @@ unescape(VALUE source, yp_unescape_type_t unescape_type) {
yp_list_t error_list; yp_list_t error_list;
yp_list_init(&error_list); yp_list_init(&error_list);
yp_unescape_manipulate_string(RSTRING_PTR(source), RSTRING_LEN(source), &string, unescape_type, &error_list); const char *start = RSTRING_PTR(source);
size_t length = RSTRING_LEN(source);
yp_parser_t parser;
yp_parser_init(&parser, start, length, "");
yp_unescape_manipulate_string(&parser, start, length, &string, unescape_type, &error_list);
if (yp_list_empty_p(&error_list)) { if (yp_list_empty_p(&error_list)) {
result = rb_str_new(yp_string_source(&string), yp_string_length(&string)); result = rb_str_new(yp_string_source(&string), yp_string_length(&string));
} else { } else {
@ -418,27 +548,32 @@ unescape(VALUE source, yp_unescape_type_t unescape_type) {
yp_string_free(&string); yp_string_free(&string);
yp_list_free(&error_list); yp_list_free(&error_list);
yp_parser_free(&parser);
return result; return result;
} }
// Do not unescape anything in the given string. This is here to provide a
// consistent API.
static VALUE static VALUE
unescape_none(VALUE self, VALUE source) { unescape_none(VALUE self, VALUE source) {
return unescape(source, YP_UNESCAPE_NONE); return unescape(source, YP_UNESCAPE_NONE);
} }
// Minimally unescape the given string. This means effectively unescaping just
// the quotes of a string. Returns the unescaped string.
static VALUE static VALUE
unescape_minimal(VALUE self, VALUE source) { unescape_minimal(VALUE self, VALUE source) {
return unescape(source, YP_UNESCAPE_MINIMAL); return unescape(source, YP_UNESCAPE_MINIMAL);
} }
// Unescape everything in the given string. Return the unescaped string.
static VALUE static VALUE
unescape_all(VALUE self, VALUE source) { unescape_all(VALUE self, VALUE source) {
return unescape(source, YP_UNESCAPE_ALL); return unescape(source, YP_UNESCAPE_ALL);
} }
// This function returns a hash of information about the given source string's // Return a hash of information about the given source string's memory usage.
// memory usage.
static VALUE static VALUE
memsize(VALUE self, VALUE string) { memsize(VALUE self, VALUE string) {
yp_parser_t parser; yp_parser_t parser;
@ -459,28 +594,17 @@ memsize(VALUE self, VALUE string) {
return result; return result;
} }
static VALUE // Parse the file, but do nothing with the result. This is used to profile the
compile(VALUE self, VALUE string) { // parser for memory and speed.
yp_parser_t parser;
size_t length = RSTRING_LEN(string);
yp_parser_init(&parser, RSTRING_PTR(string), length, NULL);
yp_node_t *node = yp_parse(&parser);
VALUE result = yp_compile(node);
yp_node_destroy(&parser, node);
yp_parser_free(&parser);
return result;
}
static VALUE static VALUE
profile_file(VALUE self, VALUE filepath) { profile_file(VALUE self, VALUE filepath) {
source_t source; input_t input;
if (source_file_load(&source, filepath) != 0) return Qnil;
const char *checked = check_filepath(filepath);
if (input_load_filepath(&input, checked) != 0) return Qnil;
yp_parser_t parser; yp_parser_t parser;
yp_parser_init(&parser, source.source, source.size, StringValueCStr(filepath)); yp_parser_init(&parser, input.source, input.size, checked);
yp_node_t *node = yp_parse(&parser); yp_node_t *node = yp_parse(&parser);
yp_node_destroy(&parser, node); yp_node_destroy(&parser, node);
@ -491,9 +615,8 @@ profile_file(VALUE self, VALUE filepath) {
// The function takes a source string and returns a Ruby array containing the // The function takes a source string and returns a Ruby array containing the
// offsets of every newline in the string. (It also includes a 0 at the // offsets of every newline in the string. (It also includes a 0 at the
// beginning to indicate the position of the first line.) // beginning to indicate the position of the first line.) It accepts a string as
// // its only argument and returns an array of integers.
// It accepts a string as its only argument and returns an array of integers.
static VALUE static VALUE
newlines(VALUE self, VALUE string) { newlines(VALUE self, VALUE string) {
yp_parser_t parser; yp_parser_t parser;
@ -512,46 +635,56 @@ newlines(VALUE self, VALUE string) {
return result; return result;
} }
/******************************************************************************/
/* Initialization of the extension */
/******************************************************************************/
RUBY_FUNC_EXPORTED void RUBY_FUNC_EXPORTED void
Init_yarp(void) { Init_yarp(void) {
// Make sure that the YARP library version matches the expected version.
// Otherwise something was compiled incorrectly.
if (strcmp(yp_version(), EXPECTED_YARP_VERSION) != 0) { if (strcmp(yp_version(), EXPECTED_YARP_VERSION) != 0) {
rb_raise(rb_eRuntimeError, "The YARP library version (%s) does not match the expected version (%s)", yp_version(), rb_raise(
EXPECTED_YARP_VERSION); rb_eRuntimeError,
"The YARP library version (%s) does not match the expected version (%s)",
yp_version(),
EXPECTED_YARP_VERSION
);
} }
// Grab up references to all of the constants that we're going to need to
// reference throughout this extension.
rb_cYARP = rb_define_module("YARP"); rb_cYARP = rb_define_module("YARP");
rb_cYARPSource = rb_define_class_under(rb_cYARP, "Source", rb_cObject);
rb_cYARPToken = rb_define_class_under(rb_cYARP, "Token", rb_cObject); rb_cYARPToken = rb_define_class_under(rb_cYARP, "Token", rb_cObject);
rb_cYARPLocation = rb_define_class_under(rb_cYARP, "Location", rb_cObject); rb_cYARPLocation = rb_define_class_under(rb_cYARP, "Location", rb_cObject);
rb_cYARPComment = rb_define_class_under(rb_cYARP, "Comment", rb_cObject); rb_cYARPComment = rb_define_class_under(rb_cYARP, "Comment", rb_cObject);
rb_cYARPParseError = rb_define_class_under(rb_cYARP, "ParseError", rb_cObject); rb_cYARPParseError = rb_define_class_under(rb_cYARP, "ParseError", rb_cObject);
rb_cYARPParseWarning = rb_define_class_under(rb_cYARP, "ParseWarning", rb_cObject); rb_cYARPParseWarning = rb_define_class_under(rb_cYARP, "ParseWarning", rb_cObject);
rb_cYARPParseResult = rb_define_class_under(rb_cYARP, "ParseResult", rb_cObject); rb_cYARPParseResult = rb_define_class_under(rb_cYARP, "ParseResult", rb_cObject);
rb_define_const(rb_cYARP, "VERSION", rb_sprintf("%d.%d.%d", YP_VERSION_MAJOR, YP_VERSION_MINOR, YP_VERSION_PATCH)); // Define the version string here so that we can use the constants defined
// in yarp.h.
rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));
rb_define_singleton_method(rb_cYARP, "dump", dump, 2); // First, the functions that have to do with lexing and parsing.
rb_define_singleton_method(rb_cYARP, "dump", dump, -1);
rb_define_singleton_method(rb_cYARP, "dump_file", dump_file, 1); rb_define_singleton_method(rb_cYARP, "dump_file", dump_file, 1);
rb_define_singleton_method(rb_cYARP, "lex", lex, -1);
rb_define_singleton_method(rb_cYARP, "lex", lex, 2);
rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1); rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
rb_define_singleton_method(rb_cYARP, "parse", parse, -1);
rb_define_singleton_method(rb_cYARP, "_parse", parse, 2);
rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1); rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);
// Next, the functions that will be called by the parser to perform various
// internal tasks. We expose these to make them easier to test.
rb_define_singleton_method(rb_cYARP, "named_captures", named_captures, 1); rb_define_singleton_method(rb_cYARP, "named_captures", named_captures, 1);
rb_define_singleton_method(rb_cYARP, "unescape_none", unescape_none, 1); rb_define_singleton_method(rb_cYARP, "unescape_none", unescape_none, 1);
rb_define_singleton_method(rb_cYARP, "unescape_minimal", unescape_minimal, 1); rb_define_singleton_method(rb_cYARP, "unescape_minimal", unescape_minimal, 1);
rb_define_singleton_method(rb_cYARP, "unescape_all", unescape_all, 1); rb_define_singleton_method(rb_cYARP, "unescape_all", unescape_all, 1);
rb_define_singleton_method(rb_cYARP, "memsize", memsize, 1); rb_define_singleton_method(rb_cYARP, "memsize", memsize, 1);
rb_define_singleton_method(rb_cYARP, "compile", compile, 1);
rb_define_singleton_method(rb_cYARP, "profile_file", profile_file, 1); rb_define_singleton_method(rb_cYARP, "profile_file", profile_file, 1);
rb_define_singleton_method(rb_cYARP, "newlines", newlines, 1); rb_define_singleton_method(rb_cYARP, "newlines", newlines, 1);
// Next, initialize the pack API.
Init_yarp_pack(); Init_yarp_pack();
} }

View File

@ -5,11 +5,11 @@
#include <ruby/encoding.h> #include <ruby/encoding.h>
#include "yarp.h" #include "yarp.h"
#include <fcntl.h> // The following headers are necessary to read files using demand paging.
#ifdef _WIN32 #ifdef _WIN32
#include <windows.h> #include <windows.h>
#else #else
#include <fcntl.h>
#include <sys/mman.h> #include <sys/mman.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <unistd.h> #include <unistd.h>
@ -17,16 +17,11 @@
#define EXPECTED_YARP_VERSION "0.4.0" #define EXPECTED_YARP_VERSION "0.4.0"
VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding); VALUE yp_source_new(yp_parser_t *parser);
VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
VALUE yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding); VALUE yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding);
VALUE yp_compile(yp_node_t *node);
void Init_yarp_pack(void); void Init_yarp_pack(void);
YP_EXPORTED_FUNCTION void Init_yarp(void); YP_EXPORTED_FUNCTION void Init_yarp(void);
#define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v)) #endif
#endif // YARP_EXT_NODE_H

View File

@ -1,20 +0,0 @@
#ifndef YARP_MISSING_H
#define YARP_MISSING_H
#include "yarp/defines.h"
#include <ctype.h>
#include <stddef.h>
#include <string.h>
const char * yp_strnstr(const char *haystack, const char *needle, size_t length);
int yp_strncasecmp(const char *string1, const char *string2, size_t length);
#ifndef HAVE_STRNCASECMP
#ifndef strncasecmp
#define strncasecmp yp_strncasecmp
#endif
#endif
#endif

View File

@ -2,8 +2,6 @@
#define YARP_NODE_H #define YARP_NODE_H
#include "yarp/defines.h" #include "yarp/defines.h"
#include "yarp.h"
#include "yarp/parser.h" #include "yarp/parser.h"
// Append a token to the given list. // Append a token to the given list.
@ -15,6 +13,20 @@ void yp_node_list_append(yp_node_list_t *list, yp_node_t *node);
// Clear the node but preserves the location. // Clear the node but preserves the location.
void yp_node_clear(yp_node_t *node); void yp_node_clear(yp_node_t *node);
// Deallocate a node and all of its children.
YP_EXPORTED_FUNCTION void yp_node_destroy(yp_parser_t *parser, struct yp_node *node);
// This struct stores the information gathered by the yp_node_memsize function.
// It contains both the memory footprint and additionally metadata about the
// shape of the tree.
typedef struct {
size_t memsize;
size_t node_count;
} yp_memsize_t;
// Calculates the memory footprint of a given node.
YP_EXPORTED_FUNCTION void yp_node_memsize(yp_node_t *node, yp_memsize_t *memsize);
#define YP_EMPTY_NODE_LIST ((yp_node_list_t) { .nodes = NULL, .size = 0, .capacity = 0 }) #define YP_EMPTY_NODE_LIST ((yp_node_list_t) { .nodes = NULL, .size = 0, .capacity = 0 })
#define YP_EMPTY_LOCATION_LIST ((yp_location_list_t) { .locations = NULL, .size = 0, .capacity = 0 }) #define YP_EMPTY_LOCATION_LIST ((yp_location_list_t) { .locations = NULL, .size = 0, .capacity = 0 })

View File

@ -3,8 +3,8 @@
#include "yarp/defines.h" #include "yarp/defines.h"
#include <stdlib.h>
#include <stdint.h> #include <stdint.h>
#include <stdlib.h>
typedef enum yp_pack_version { typedef enum yp_pack_version {
YP_PACK_VERSION_3_2_0 YP_PACK_VERSION_3_2_0

View File

@ -1,17 +1,16 @@
#ifndef YARP_PARSER_H #ifndef YARP_PARSER_H
#define YARP_PARSER_H #define YARP_PARSER_H
#include "yarp/defines.h"
#include <stdbool.h>
#include "yarp/ast.h" #include "yarp/ast.h"
#include "yarp/defines.h"
#include "yarp/enc/yp_encoding.h" #include "yarp/enc/yp_encoding.h"
#include "yarp/util/yp_constant_pool.h" #include "yarp/util/yp_constant_pool.h"
#include "yarp/util/yp_list.h" #include "yarp/util/yp_list.h"
#include "yarp/util/yp_newline_list.h" #include "yarp/util/yp_newline_list.h"
#include "yarp/util/yp_state_stack.h" #include "yarp/util/yp_state_stack.h"
#include <stdbool.h>
// This enum provides various bits that represent different kinds of states that // This enum provides various bits that represent different kinds of states that
// the lexer can track. This is used to determine which kind of token to return // the lexer can track. This is used to determine which kind of token to return
// based on the context of the parser. // based on the context of the parser.

View File

@ -5,6 +5,8 @@
/* if you are looking to modify the */ /* if you are looking to modify the */
/* template */ /* template */
/******************************************************************************/ /******************************************************************************/
#include "yarp/defines.h"
#include <stdio.h> #include <stdio.h>
#include "yarp/ast.h" #include "yarp/ast.h"
@ -14,7 +16,7 @@
static void static void
prettyprint_location(yp_buffer_t *buffer, yp_parser_t *parser, yp_location_t *location) { prettyprint_location(yp_buffer_t *buffer, yp_parser_t *parser, yp_location_t *location) {
char printed[] = "[0000-0000]"; char printed[] = "[0000-0000]";
sprintf(printed, "[%04ld-%04ld]", (long int)(location->start - parser->start), (long int)(location->end - parser->start)); yp_snprintf(printed, sizeof(printed), "[%04ld-%04ld]", (long int)(location->start - parser->start), (long int)(location->end - parser->start));
yp_buffer_append_str(buffer, printed, strlen(printed)); yp_buffer_append_str(buffer, printed, strlen(printed));
} }
@ -189,7 +191,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
for (uint32_t index = 0; index < ((yp_block_node_t *)node)->locals.size; index++) { for (uint32_t index = 0; index < ((yp_block_node_t *)node)->locals.size; index++) {
if (index != 0) yp_buffer_append_str(buffer, ", ", 2); if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
char locals_buffer[12]; char locals_buffer[12];
sprintf(locals_buffer, "%u", ((yp_block_node_t *)node)->locals.ids[index]); yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_block_node_t *)node)->locals.ids[index]);
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer)); yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
} }
yp_buffer_append_str(buffer, ", ", 2); if (((yp_block_node_t *)node)->parameters == NULL) { yp_buffer_append_str(buffer, ", ", 2); if (((yp_block_node_t *)node)->parameters == NULL) {
@ -291,7 +293,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
prettyprint_node(buffer, parser, (yp_node_t *)((yp_call_node_t *)node)->block); prettyprint_node(buffer, parser, (yp_node_t *)((yp_call_node_t *)node)->block);
} }
yp_buffer_append_str(buffer, ", ", 2); char flags_buffer[12]; yp_buffer_append_str(buffer, ", ", 2); char flags_buffer[12];
sprintf(flags_buffer, "+%d", ((yp_call_node_t *)node)->flags); yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_call_node_t *)node)->flags);
yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer)); yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
yp_buffer_append_str(buffer, ", ", 2); yp_buffer_append_str(buffer, "\"", 1); yp_buffer_append_str(buffer, ", ", 2); yp_buffer_append_str(buffer, "\"", 1);
yp_buffer_append_str(buffer, yp_string_source(&((yp_call_node_t *)node)->name), yp_string_length(&((yp_call_node_t *)node)->name)); yp_buffer_append_str(buffer, yp_string_source(&((yp_call_node_t *)node)->name), yp_string_length(&((yp_call_node_t *)node)->name));
@ -321,7 +323,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_call_operator_write_node_t *)node)->operator_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_call_operator_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_call_operator_write_node_t *)node)->value); yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_call_operator_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char operator_id_buffer[12]; yp_buffer_append_str(buffer, ", ", 2); char operator_id_buffer[12];
sprintf(operator_id_buffer, "%u", ((yp_call_operator_write_node_t *)node)->operator_id); yp_snprintf(operator_id_buffer, sizeof(operator_id_buffer), "%u", ((yp_call_operator_write_node_t *)node)->operator_id);
yp_buffer_append_str(buffer, operator_id_buffer, strlen(operator_id_buffer)); yp_buffer_append_str(buffer, operator_id_buffer, strlen(operator_id_buffer));
yp_buffer_append_str(buffer, ")", 1); yp_buffer_append_str(buffer, ")", 1);
break; break;
@ -360,7 +362,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
for (uint32_t index = 0; index < ((yp_class_node_t *)node)->locals.size; index++) { for (uint32_t index = 0; index < ((yp_class_node_t *)node)->locals.size; index++) {
if (index != 0) yp_buffer_append_str(buffer, ", ", 2); if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
char locals_buffer[12]; char locals_buffer[12];
sprintf(locals_buffer, "%u", ((yp_class_node_t *)node)->locals.ids[index]); yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_class_node_t *)node)->locals.ids[index]);
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer)); yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
} }
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_class_node_t *)node)->class_keyword_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_class_node_t *)node)->class_keyword_loc);
@ -406,7 +408,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_class_variable_operator_write_node_t *)node)->operator_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_class_variable_operator_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_class_variable_operator_write_node_t *)node)->value); yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_class_variable_operator_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12]; yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12];
sprintf(operator_buffer, "%u", ((yp_class_variable_operator_write_node_t *)node)->operator); yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_class_variable_operator_write_node_t *)node)->operator);
yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer)); yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
yp_buffer_append_str(buffer, ")", 1); yp_buffer_append_str(buffer, ")", 1);
break; break;
@ -454,7 +456,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_constant_operator_write_node_t *)node)->operator_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_constant_operator_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_constant_operator_write_node_t *)node)->value); yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_constant_operator_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12]; yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12];
sprintf(operator_buffer, "%u", ((yp_constant_operator_write_node_t *)node)->operator); yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_constant_operator_write_node_t *)node)->operator);
yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer)); yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
yp_buffer_append_str(buffer, ")", 1); yp_buffer_append_str(buffer, ")", 1);
break; break;
@ -493,7 +495,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_constant_path_operator_write_node_t *)node)->operator_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_constant_path_operator_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_constant_path_operator_write_node_t *)node)->value); yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_constant_path_operator_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12]; yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12];
sprintf(operator_buffer, "%u", ((yp_constant_path_operator_write_node_t *)node)->operator); yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_constant_path_operator_write_node_t *)node)->operator);
yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer)); yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
yp_buffer_append_str(buffer, ")", 1); yp_buffer_append_str(buffer, ")", 1);
break; break;
@ -540,7 +542,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); for (uint32_t index = 0; index < ((yp_def_node_t *)node)->locals.size; index++) { yp_buffer_append_str(buffer, ", ", 2); for (uint32_t index = 0; index < ((yp_def_node_t *)node)->locals.size; index++) {
if (index != 0) yp_buffer_append_str(buffer, ", ", 2); if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
char locals_buffer[12]; char locals_buffer[12];
sprintf(locals_buffer, "%u", ((yp_def_node_t *)node)->locals.ids[index]); yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_def_node_t *)node)->locals.ids[index]);
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer)); yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
} }
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_def_node_t *)node)->def_keyword_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_def_node_t *)node)->def_keyword_loc);
@ -734,7 +736,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_global_variable_operator_write_node_t *)node)->operator_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_global_variable_operator_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_global_variable_operator_write_node_t *)node)->value); yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_global_variable_operator_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12]; yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12];
sprintf(operator_buffer, "%u", ((yp_global_variable_operator_write_node_t *)node)->operator); yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_global_variable_operator_write_node_t *)node)->operator);
yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer)); yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
yp_buffer_append_str(buffer, ")", 1); yp_buffer_append_str(buffer, ")", 1);
break; break;
@ -871,7 +873,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_instance_variable_operator_write_node_t *)node)->operator_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_instance_variable_operator_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_instance_variable_operator_write_node_t *)node)->value); yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_instance_variable_operator_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12]; yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12];
sprintf(operator_buffer, "%u", ((yp_instance_variable_operator_write_node_t *)node)->operator); yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_instance_variable_operator_write_node_t *)node)->operator);
yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer)); yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
yp_buffer_append_str(buffer, ")", 1); yp_buffer_append_str(buffer, ")", 1);
break; break;
@ -911,7 +913,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
} }
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_interpolated_regular_expression_node_t *)node)->closing_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_interpolated_regular_expression_node_t *)node)->closing_loc);
yp_buffer_append_str(buffer, ", ", 2); char flags_buffer[12]; yp_buffer_append_str(buffer, ", ", 2); char flags_buffer[12];
sprintf(flags_buffer, "+%d", ((yp_interpolated_regular_expression_node_t *)node)->flags); yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_interpolated_regular_expression_node_t *)node)->flags);
yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer)); yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
yp_buffer_append_str(buffer, ")", 1); yp_buffer_append_str(buffer, ")", 1);
break; break;
@ -1001,7 +1003,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
for (uint32_t index = 0; index < ((yp_lambda_node_t *)node)->locals.size; index++) { for (uint32_t index = 0; index < ((yp_lambda_node_t *)node)->locals.size; index++) {
if (index != 0) yp_buffer_append_str(buffer, ", ", 2); if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
char locals_buffer[12]; char locals_buffer[12];
sprintf(locals_buffer, "%u", ((yp_lambda_node_t *)node)->locals.ids[index]); yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_lambda_node_t *)node)->locals.ids[index]);
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer)); yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
} }
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_lambda_node_t *)node)->opening_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_lambda_node_t *)node)->opening_loc);
@ -1024,7 +1026,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_local_variable_operator_and_write_node_t *)node)->operator_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_local_variable_operator_and_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_and_write_node_t *)node)->value); yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_and_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char constant_id_buffer[12]; yp_buffer_append_str(buffer, ", ", 2); char constant_id_buffer[12];
sprintf(constant_id_buffer, "%u", ((yp_local_variable_operator_and_write_node_t *)node)->constant_id); yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_operator_and_write_node_t *)node)->constant_id);
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer)); yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
yp_buffer_append_str(buffer, ")", 1); yp_buffer_append_str(buffer, ")", 1);
break; break;
@ -1035,7 +1037,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_local_variable_operator_or_write_node_t *)node)->operator_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_local_variable_operator_or_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_or_write_node_t *)node)->value); yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_or_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char constant_id_buffer[12]; yp_buffer_append_str(buffer, ", ", 2); char constant_id_buffer[12];
sprintf(constant_id_buffer, "%u", ((yp_local_variable_operator_or_write_node_t *)node)->constant_id); yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_operator_or_write_node_t *)node)->constant_id);
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer)); yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
yp_buffer_append_str(buffer, ")", 1); yp_buffer_append_str(buffer, ")", 1);
break; break;
@ -1046,10 +1048,10 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_local_variable_operator_write_node_t *)node)->operator_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_local_variable_operator_write_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_write_node_t *)node)->value); yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_write_node_t *)node)->value);
yp_buffer_append_str(buffer, ", ", 2); char constant_id_buffer[12]; yp_buffer_append_str(buffer, ", ", 2); char constant_id_buffer[12];
sprintf(constant_id_buffer, "%u", ((yp_local_variable_operator_write_node_t *)node)->constant_id); yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_operator_write_node_t *)node)->constant_id);
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer)); yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
yp_buffer_append_str(buffer, ", ", 2); char operator_id_buffer[12]; yp_buffer_append_str(buffer, ", ", 2); char operator_id_buffer[12];
sprintf(operator_id_buffer, "%u", ((yp_local_variable_operator_write_node_t *)node)->operator_id); yp_snprintf(operator_id_buffer, sizeof(operator_id_buffer), "%u", ((yp_local_variable_operator_write_node_t *)node)->operator_id);
yp_buffer_append_str(buffer, operator_id_buffer, strlen(operator_id_buffer)); yp_buffer_append_str(buffer, operator_id_buffer, strlen(operator_id_buffer));
yp_buffer_append_str(buffer, ")", 1); yp_buffer_append_str(buffer, ")", 1);
break; break;
@ -1057,10 +1059,10 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
case YP_NODE_LOCAL_VARIABLE_READ_NODE: { case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
yp_buffer_append_str(buffer, "LocalVariableReadNode(", 22); yp_buffer_append_str(buffer, "LocalVariableReadNode(", 22);
char constant_id_buffer[12]; char constant_id_buffer[12];
sprintf(constant_id_buffer, "%u", ((yp_local_variable_read_node_t *)node)->constant_id); yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_read_node_t *)node)->constant_id);
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer)); yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
yp_buffer_append_str(buffer, ", ", 2); char depth_buffer[12]; yp_buffer_append_str(buffer, ", ", 2); char depth_buffer[12];
sprintf(depth_buffer, "+%d", ((yp_local_variable_read_node_t *)node)->depth); yp_snprintf(depth_buffer, sizeof(depth_buffer), "+%d", ((yp_local_variable_read_node_t *)node)->depth);
yp_buffer_append_str(buffer, depth_buffer, strlen(depth_buffer)); yp_buffer_append_str(buffer, depth_buffer, strlen(depth_buffer));
yp_buffer_append_str(buffer, ")", 1); yp_buffer_append_str(buffer, ")", 1);
break; break;
@ -1068,10 +1070,10 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
case YP_NODE_LOCAL_VARIABLE_WRITE_NODE: { case YP_NODE_LOCAL_VARIABLE_WRITE_NODE: {
yp_buffer_append_str(buffer, "LocalVariableWriteNode(", 23); yp_buffer_append_str(buffer, "LocalVariableWriteNode(", 23);
char constant_id_buffer[12]; char constant_id_buffer[12];
sprintf(constant_id_buffer, "%u", ((yp_local_variable_write_node_t *)node)->constant_id); yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_write_node_t *)node)->constant_id);
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer)); yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
yp_buffer_append_str(buffer, ", ", 2); char depth_buffer[12]; yp_buffer_append_str(buffer, ", ", 2); char depth_buffer[12];
sprintf(depth_buffer, "+%d", ((yp_local_variable_write_node_t *)node)->depth); yp_snprintf(depth_buffer, sizeof(depth_buffer), "+%d", ((yp_local_variable_write_node_t *)node)->depth);
yp_buffer_append_str(buffer, depth_buffer, strlen(depth_buffer)); yp_buffer_append_str(buffer, depth_buffer, strlen(depth_buffer));
yp_buffer_append_str(buffer, ", ", 2); if (((yp_local_variable_write_node_t *)node)->value == NULL) { yp_buffer_append_str(buffer, ", ", 2); if (((yp_local_variable_write_node_t *)node)->value == NULL) {
yp_buffer_append_str(buffer, "nil", 3); yp_buffer_append_str(buffer, "nil", 3);
@ -1113,7 +1115,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
for (uint32_t index = 0; index < ((yp_module_node_t *)node)->locals.size; index++) { for (uint32_t index = 0; index < ((yp_module_node_t *)node)->locals.size; index++) {
if (index != 0) yp_buffer_append_str(buffer, ", ", 2); if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
char locals_buffer[12]; char locals_buffer[12];
sprintf(locals_buffer, "%u", ((yp_module_node_t *)node)->locals.ids[index]); yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_module_node_t *)node)->locals.ids[index]);
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer)); yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
} }
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_module_node_t *)node)->module_keyword_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_module_node_t *)node)->module_keyword_loc);
@ -1187,7 +1189,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
case YP_NODE_OPTIONAL_PARAMETER_NODE: { case YP_NODE_OPTIONAL_PARAMETER_NODE: {
yp_buffer_append_str(buffer, "OptionalParameterNode(", 22); yp_buffer_append_str(buffer, "OptionalParameterNode(", 22);
char constant_id_buffer[12]; char constant_id_buffer[12];
sprintf(constant_id_buffer, "%u", ((yp_optional_parameter_node_t *)node)->constant_id); yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_optional_parameter_node_t *)node)->constant_id);
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer)); yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_optional_parameter_node_t *)node)->name_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_optional_parameter_node_t *)node)->name_loc);
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_optional_parameter_node_t *)node)->operator_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_optional_parameter_node_t *)node)->operator_loc);
@ -1298,7 +1300,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
for (uint32_t index = 0; index < ((yp_program_node_t *)node)->locals.size; index++) { for (uint32_t index = 0; index < ((yp_program_node_t *)node)->locals.size; index++) {
if (index != 0) yp_buffer_append_str(buffer, ", ", 2); if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
char locals_buffer[12]; char locals_buffer[12];
sprintf(locals_buffer, "%u", ((yp_program_node_t *)node)->locals.ids[index]); yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_program_node_t *)node)->locals.ids[index]);
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer)); yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
} }
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_program_node_t *)node)->statements); yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_program_node_t *)node)->statements);
@ -1319,7 +1321,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
} }
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_range_node_t *)node)->operator_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_range_node_t *)node)->operator_loc);
yp_buffer_append_str(buffer, ", ", 2); char flags_buffer[12]; yp_buffer_append_str(buffer, ", ", 2); char flags_buffer[12];
sprintf(flags_buffer, "+%d", ((yp_range_node_t *)node)->flags); yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_range_node_t *)node)->flags);
yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer)); yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
yp_buffer_append_str(buffer, ")", 1); yp_buffer_append_str(buffer, ")", 1);
break; break;
@ -1344,7 +1346,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
yp_buffer_append_str(buffer, yp_string_source(&((yp_regular_expression_node_t *)node)->unescaped), yp_string_length(&((yp_regular_expression_node_t *)node)->unescaped)); yp_buffer_append_str(buffer, yp_string_source(&((yp_regular_expression_node_t *)node)->unescaped), yp_string_length(&((yp_regular_expression_node_t *)node)->unescaped));
yp_buffer_append_str(buffer, "\"", 1); yp_buffer_append_str(buffer, "\"", 1);
yp_buffer_append_str(buffer, ", ", 2); char flags_buffer[12]; yp_buffer_append_str(buffer, ", ", 2); char flags_buffer[12];
sprintf(flags_buffer, "+%d", ((yp_regular_expression_node_t *)node)->flags); yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_regular_expression_node_t *)node)->flags);
yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer)); yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
yp_buffer_append_str(buffer, ")", 1); yp_buffer_append_str(buffer, ")", 1);
break; break;
@ -1363,7 +1365,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
case YP_NODE_REQUIRED_PARAMETER_NODE: { case YP_NODE_REQUIRED_PARAMETER_NODE: {
yp_buffer_append_str(buffer, "RequiredParameterNode(", 22); yp_buffer_append_str(buffer, "RequiredParameterNode(", 22);
char constant_id_buffer[12]; char constant_id_buffer[12];
sprintf(constant_id_buffer, "%u", ((yp_required_parameter_node_t *)node)->constant_id); yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_required_parameter_node_t *)node)->constant_id);
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer)); yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
yp_buffer_append_str(buffer, ")", 1); yp_buffer_append_str(buffer, ")", 1);
break; break;
@ -1443,7 +1445,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
for (uint32_t index = 0; index < ((yp_singleton_class_node_t *)node)->locals.size; index++) { for (uint32_t index = 0; index < ((yp_singleton_class_node_t *)node)->locals.size; index++) {
if (index != 0) yp_buffer_append_str(buffer, ", ", 2); if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
char locals_buffer[12]; char locals_buffer[12];
sprintf(locals_buffer, "%u", ((yp_singleton_class_node_t *)node)->locals.ids[index]); yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_singleton_class_node_t *)node)->locals.ids[index]);
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer)); yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
} }
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_singleton_class_node_t *)node)->class_keyword_loc); yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_singleton_class_node_t *)node)->class_keyword_loc);

View File

@ -374,7 +374,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
case '#': { // inline comments case '#': { // inline comments
bool found = yp_regexp_char_find(parser, ')'); bool found = yp_regexp_char_find(parser, ')');
// the close paren we found is escaped, we need to find another // the close paren we found is escaped, we need to find another
while (parser->start <= parser->cursor - 2 && *(parser->cursor - 2) == '\\') { while (found && (parser->start <= parser->cursor - 2) && (*(parser->cursor - 2) == '\\')) {
found = yp_regexp_char_find(parser, ')'); found = yp_regexp_char_find(parser, ')');
} }
return found; return found;

View File

@ -2,15 +2,14 @@
#define YARP_REGEXP_H #define YARP_REGEXP_H
#include "yarp/defines.h" #include "yarp/defines.h"
#include "yarp/parser.h" #include "yarp/parser.h"
#include "yarp/util/yp_string_list.h"
#include "yarp/util/yp_string.h"
#include <stdbool.h> #include <stdbool.h>
#include <stddef.h> #include <stddef.h>
#include <string.h> #include <string.h>
#include "yarp/util/yp_string_list.h"
#include "yarp/util/yp_string.h"
// Parse a regular expression and extract the names of all of the named capture // Parse a regular expression and extract the names of all of the named capture
// groups. // groups.
YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures); YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures);

View File

@ -438,14 +438,14 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
// \c? or \C-? delete, ASCII 7Fh (DEL) // \c? or \C-? delete, ASCII 7Fh (DEL)
// //
YP_EXPORTED_FUNCTION void YP_EXPORTED_FUNCTION void
yp_unescape_manipulate_string(const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list) { yp_unescape_manipulate_string(yp_parser_t *parser, const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list) {
if (unescape_type == YP_UNESCAPE_NONE) { if (unescape_type == YP_UNESCAPE_NONE) {
// If we're not unescaping then we can reference the source directly. // If we're not unescaping then we can reference the source directly.
yp_string_shared_init(string, value, value + length); yp_string_shared_init(string, value, value + length);
return; return;
} }
const char *backslash = memchr(value, '\\', length); const char *backslash = yp_memchr(parser, value, '\\', length);
if (backslash == NULL) { if (backslash == NULL) {
// Here there are no escapes, so we can reference the source directly. // Here there are no escapes, so we can reference the source directly.
@ -509,7 +509,7 @@ yp_unescape_manipulate_string(const char *value, size_t length, yp_string_t *str
} }
if (end > cursor) { if (end > cursor) {
backslash = memchr(cursor, '\\', (size_t) (end - cursor)); backslash = yp_memchr(parser, cursor, '\\', (size_t) (end - cursor));
} else { } else {
backslash = NULL; backslash = NULL;
} }

View File

@ -2,17 +2,18 @@
#define YARP_UNESCAPE_H #define YARP_UNESCAPE_H
#include "yarp/defines.h" #include "yarp/defines.h"
#include "yarp/diagnostic.h"
#include "yarp/parser.h"
#include "yarp/util/yp_char.h"
#include "yarp/util/yp_list.h"
#include "yarp/util/yp_memchr.h"
#include "yarp/util/yp_string.h"
#include <assert.h> #include <assert.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include <string.h> #include <string.h>
#include "yarp/diagnostic.h"
#include "yarp/util/yp_char.h"
#include "yarp/util/yp_list.h"
#include "yarp/util/yp_string.h"
// The type of unescape we are performing. // The type of unescape we are performing.
typedef enum { typedef enum {
// When we're creating a string inside of a list literal like %w, we // When we're creating a string inside of a list literal like %w, we
@ -30,7 +31,7 @@ typedef enum {
// Unescape the contents of the given token into the given string using the // Unescape the contents of the given token into the given string using the
// given unescape mode. // given unescape mode.
YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list); YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser, const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list);
YP_EXPORTED_FUNCTION size_t yp_unescape_calculate_difference(const char *value, const char *end, yp_unescape_type_t unescape_type, bool expect_single_codepoint, yp_list_t *error_list); YP_EXPORTED_FUNCTION size_t yp_unescape_calculate_difference(const char *value, const char *end, yp_unescape_type_t unescape_type, bool expect_single_codepoint, yp_list_t *error_list);

View File

@ -2,12 +2,11 @@
#define YP_CHAR_H #define YP_CHAR_H
#include "yarp/defines.h" #include "yarp/defines.h"
#include "yarp/util/yp_newline_list.h"
#include <stdbool.h> #include <stdbool.h>
#include <stddef.h> #include <stddef.h>
#include "yarp/util/yp_newline_list.h"
// Returns the number of characters at the start of the string that are // Returns the number of characters at the start of the string that are
// whitespace. Disallows searching past the given maximum number of characters. // whitespace. Disallows searching past the given maximum number of characters.
size_t yp_strspn_whitespace(const char *string, ptrdiff_t length); size_t yp_strspn_whitespace(const char *string, ptrdiff_t length);

View File

@ -6,13 +6,13 @@
#ifndef YP_CONSTANT_POOL_H #ifndef YP_CONSTANT_POOL_H
#define YP_CONSTANT_POOL_H #define YP_CONSTANT_POOL_H
#include "yarp/defines.h"
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "yarp/defines.h"
typedef uint32_t yp_constant_id_t; typedef uint32_t yp_constant_id_t;
typedef struct { typedef struct {

31
yarp/util/yp_memchr.c Normal file
View File

@ -0,0 +1,31 @@
#include "yarp/util/yp_memchr.h"
#define YP_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
// We need to roll our own memchr to handle cases where the encoding changes and
// we need to search for a character in a buffer that could be the trailing byte
// of a multibyte character.
void *
yp_memchr(yp_parser_t *parser, const void *memory, int character, size_t number) {
if (parser->encoding_changed && parser->encoding.multibyte && character >= YP_MEMCHR_TRAILING_BYTE_MINIMUM) {
const char *source = (const char *) memory;
size_t index = 0;
while (index < number) {
if (source[index] == character) {
return (void *) (source + index);
}
size_t width = parser->encoding.char_width(source + index);
if (width == 0) {
return NULL;
}
index += width;
}
return NULL;
} else {
return memchr(memory, character, number);
}
}

14
yarp/util/yp_memchr.h Normal file
View File

@ -0,0 +1,14 @@
#ifndef YP_MEMCHR_H
#define YP_MEMCHR_H
#include "yarp/defines.h"
#include "yarp/parser.h"
#include <stddef.h>
// We need to roll our own memchr to handle cases where the encoding changes and
// we need to search for a character in a buffer that could be the trailing byte
// of a multibyte character.
void * yp_memchr(yp_parser_t *parser, const void *source, int character, size_t number);
#endif

View File

@ -31,7 +31,7 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
} }
assert(cursor >= list->start); assert(cursor >= list->start);
list->offsets[list->size++] = (size_t) (cursor - list->start); list->offsets[list->size++] = (size_t) (cursor - list->start + 1);
return true; return true;
} }

View File

@ -9,13 +9,13 @@
#ifndef YP_NEWLINE_LIST_H #ifndef YP_NEWLINE_LIST_H
#define YP_NEWLINE_LIST_H #define YP_NEWLINE_LIST_H
#include <assert.h>
#include <stddef.h>
#include <stdbool.h>
#include <stdlib.h>
#include "yarp/defines.h" #include "yarp/defines.h"
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdlib.h>
// A list of offsets of newlines in a string. The offsets are assumed to be // A list of offsets of newlines in a string. The offsets are assumed to be
// sorted/inserted in ascending order. // sorted/inserted in ascending order.
typedef struct { typedef struct {

14
yarp/util/yp_snprintf.c Normal file
View File

@ -0,0 +1,14 @@
#include "yarp/defines.h"
#ifndef HAVE_SNPRINTF
// In case snprintf isn't present on the system, we provide our own that simply
// forwards to the less-safe sprintf.
int
yp_snprintf(char *dest, YP_ATTRIBUTE_UNUSED size_t size, const char *format, ...) {
va_list args;
va_start(args, format);
int result = vsprintf(dest, format, args);
va_end(args);
return result;
}
#endif

View File

@ -1,11 +1,5 @@
#include "yarp/util/yp_string.h" #include "yarp/util/yp_string.h"
// Allocate a new yp_string_t.
yp_string_t *
yp_string_alloc(void) {
return (yp_string_t *) malloc(sizeof(yp_string_t));
}
// Initialize a shared string that is based on initial input. // Initialize a shared string that is based on initial input.
void void
yp_string_shared_init(yp_string_t *string, const char *start, const char *end) { yp_string_shared_init(yp_string_t *string, const char *start, const char *end) {

View File

@ -29,9 +29,6 @@ typedef struct {
} as; } as;
} yp_string_t; } yp_string_t;
// Allocate a new yp_string_t.
yp_string_t * yp_string_alloc(void);
// Initialize a shared string that is based on initial input. // Initialize a shared string that is based on initial input.
void yp_string_shared_init(yp_string_t *string, const char *start, const char *end); void yp_string_shared_init(yp_string_t *string, const char *start, const char *end);

View File

@ -2,12 +2,11 @@
#define YARP_STRING_LIST_H #define YARP_STRING_LIST_H
#include "yarp/defines.h" #include "yarp/defines.h"
#include "yarp/util/yp_string.h"
#include <stddef.h> #include <stddef.h>
#include <stdlib.h> #include <stdlib.h>
#include "yarp/util/yp_string.h"
typedef struct { typedef struct {
yp_string_t *strings; yp_string_t *strings;
size_t length; size_t length;

View File

@ -1,19 +1,5 @@
#include "yarp/missing.h" #include <ctype.h>
#include <stddef.h>
const char *
yp_strnstr(const char *haystack, const char *needle, size_t length) {
size_t needle_length = strlen(needle);
if (needle_length > length) return NULL;
const char *haystack_limit = haystack + length - needle_length + 1;
while ((haystack = memchr(haystack, needle[0], (size_t) (haystack_limit - haystack))) != NULL) {
if (!strncmp(haystack, needle, needle_length)) return haystack;
haystack++;
}
return NULL;
}
int int
yp_strncasecmp(const char *string1, const char *string2, size_t length) { yp_strncasecmp(const char *string1, const char *string2, size_t length) {

View File

@ -1,5 +1,42 @@
#include "yarp/util/yp_strpbrk.h" #include "yarp/util/yp_strpbrk.h"
// This is the slow path that does care about the encoding.
static inline const char *
yp_strpbrk_multi_byte(yp_parser_t *parser, const char *source, const char *charset, size_t maximum) {
size_t index = 0;
while (index < maximum) {
if (strchr(charset, source[index]) != NULL) {
return source + index;
}
size_t width = parser->encoding.char_width(source + index);
if (width == 0) {
return NULL;
}
index += width;
}
return NULL;
}
// This is the fast path that does not care about the encoding.
static inline const char *
yp_strpbrk_single_byte(const char *source, const char *charset, size_t maximum) {
size_t index = 0;
while (index < maximum) {
if (strchr(charset, source[index]) != NULL) {
return source + index;
}
index++;
}
return NULL;
}
// Here we have rolled our own version of strpbrk. The standard library strpbrk // Here we have rolled our own version of strpbrk. The standard library strpbrk
// has undefined behavior when the source string is not null-terminated. We want // has undefined behavior when the source string is not null-terminated. We want
// to support strings that are not null-terminated because yp_parse does not // to support strings that are not null-terminated because yp_parse does not
@ -12,19 +49,18 @@
// also don't want it to stop on null bytes. Ruby actually allows null bytes // also don't want it to stop on null bytes. Ruby actually allows null bytes
// within strings, comments, regular expressions, etc. So we need to be able to // within strings, comments, regular expressions, etc. So we need to be able to
// skip past them. // skip past them.
//
// Finally, we want to support encodings wherein the charset could contain
// characters that are trailing bytes of multi-byte characters. For example, in
// Shift-JIS, the backslash character can be a trailing byte. In that case we
// need to take a slower path and iterate one multi-byte character at a time.
const char * const char *
yp_strpbrk(const char *source, const char *charset, ptrdiff_t length) { yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length) {
if (length < 0) return NULL; if (length <= 0) {
return NULL;
size_t index = 0; } else if (parser->encoding_changed && parser->encoding.multibyte) {
size_t maximum = (size_t) length; return yp_strpbrk_multi_byte(parser, source, charset, (size_t) length);
} else {
while (index < maximum) { return yp_strpbrk_single_byte(source, charset, (size_t) length);
if (strchr(charset, source[index]) != NULL) {
return &source[index];
}
index++;
} }
return NULL;
} }

View File

@ -2,6 +2,7 @@
#define YP_STRPBRK_H #define YP_STRPBRK_H
#include "yarp/defines.h" #include "yarp/defines.h"
#include "yarp/parser.h"
#include <stddef.h> #include <stddef.h>
#include <string.h> #include <string.h>
@ -18,6 +19,11 @@
// also don't want it to stop on null bytes. Ruby actually allows null bytes // also don't want it to stop on null bytes. Ruby actually allows null bytes
// within strings, comments, regular expressions, etc. So we need to be able to // within strings, comments, regular expressions, etc. So we need to be able to
// skip past them. // skip past them.
const char * yp_strpbrk(const char *source, const char *charset, ptrdiff_t length); //
// Finally, we want to support encodings wherein the charset could contain
// characters that are trailing bytes of multi-byte characters. For example, in
// Shift-JIS, the backslash character can be a trailing byte. In that case we
// need to take a slower path and iterate one multi-byte character at a time.
const char * yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length);
#endif #endif

5
yarp/version.h Normal file
View File

@ -0,0 +1,5 @@
#define YP_VERSION_MAJOR 0
#define YP_VERSION_MINOR 4
#define YP_VERSION_PATCH 0
#define YP_VERSION "0.4.0"

View File

@ -1,16 +1,19 @@
#include "yarp.h" #include "yarp.h"
#include "yarp/version.h"
#define YP_STRINGIZE0(expr) #expr // The YARP version and the serialization format.
#define YP_STRINGIZE(expr) YP_STRINGIZE0(expr)
#define YP_VERSION_MACRO YP_STRINGIZE(YP_VERSION_MAJOR) "." YP_STRINGIZE(YP_VERSION_MINOR) "." YP_STRINGIZE(YP_VERSION_PATCH)
#define YP_TAB_WHITESPACE_SIZE 8
const char * const char *
yp_version(void) { yp_version(void) {
return YP_VERSION_MACRO; return YP_VERSION;
} }
// In heredocs, tabs automatically complete up to the next 8 spaces. This is
// defined in CRuby as TAB_WIDTH.
#define YP_TAB_WHITESPACE_SIZE 8
// Debugging logging will provide you will additional debugging functions as
// well as automatically replace some functions with their debugging
// counterparts.
#ifndef YP_DEBUG_LOGGING #ifndef YP_DEBUG_LOGGING
#define YP_DEBUG_LOGGING 0 #define YP_DEBUG_LOGGING 0
#endif #endif
@ -442,6 +445,7 @@ not_provided(yp_parser_t *parser) {
return (yp_token_t) { .type = YP_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start }; return (yp_token_t) { .type = YP_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
} }
#define YP_EMPTY_STRING ((yp_string_t) { .type = YP_STRING_SHARED, .as.shared.start = NULL, .as.shared.end = NULL })
#define YP_LOCATION_NULL_VALUE(parser) ((yp_location_t) { .start = parser->start, .end = parser->start }) #define YP_LOCATION_NULL_VALUE(parser) ((yp_location_t) { .start = parser->start, .end = parser->start })
#define YP_LOCATION_TOKEN_VALUE(token) ((yp_location_t) { .start = (token)->start, .end = (token)->end }) #define YP_LOCATION_TOKEN_VALUE(token) ((yp_location_t) { .start = (token)->start, .end = (token)->end })
#define YP_LOCATION_NODE_VALUE(node) ((yp_location_t) { .start = (node)->location.start, .end = (node)->location.end }) #define YP_LOCATION_NODE_VALUE(node) ((yp_location_t) { .start = (node)->location.start, .end = (node)->location.end })
@ -675,7 +679,9 @@ yp_array_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *node
.constant = NULL, .constant = NULL,
.rest = NULL, .rest = NULL,
.requireds = YP_EMPTY_NODE_LIST, .requireds = YP_EMPTY_NODE_LIST,
.posts = YP_EMPTY_NODE_LIST .posts = YP_EMPTY_NODE_LIST,
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
}; };
// For now we're going to just copy over each pointer manually. This could be // For now we're going to just copy over each pointer manually. This could be
@ -684,7 +690,7 @@ yp_array_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *node
for (size_t index = 0; index < nodes->size; index++) { for (size_t index = 0; index < nodes->size; index++) {
yp_node_t *child = nodes->nodes[index]; yp_node_t *child = nodes->nodes[index];
if (child->type == YP_NODE_SPLAT_NODE) { if (!found_rest && child->type == YP_NODE_SPLAT_NODE) {
node->rest = child; node->rest = child;
found_rest = true; found_rest = true;
} else if (found_rest) { } else if (found_rest) {
@ -710,7 +716,9 @@ yp_array_pattern_node_rest_create(yp_parser_t *parser, yp_node_t *rest) {
.constant = NULL, .constant = NULL,
.rest = rest, .rest = rest,
.requireds = YP_EMPTY_NODE_LIST, .requireds = YP_EMPTY_NODE_LIST,
.posts = YP_EMPTY_NODE_LIST .posts = YP_EMPTY_NODE_LIST,
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
}; };
return node; return node;
@ -1885,7 +1893,9 @@ yp_find_pattern_node_create(yp_parser_t *parser, yp_node_list_t *nodes) {
.constant = NULL, .constant = NULL,
.left = left, .left = left,
.right = right, .right = right,
.requireds = YP_EMPTY_NODE_LIST .requireds = YP_EMPTY_NODE_LIST,
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
}; };
// For now we're going to just copy over each pointer manually. This could be // For now we're going to just copy over each pointer manually. This could be
@ -2018,7 +2028,9 @@ yp_hash_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *assoc
}, },
.constant = NULL, .constant = NULL,
.kwrest = NULL, .kwrest = NULL,
.assocs = YP_EMPTY_NODE_LIST .assocs = YP_EMPTY_NODE_LIST,
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
}; };
for (size_t index = 0; index < assocs->size; index++) { for (size_t index = 0; index < assocs->size; index++) {
@ -3709,7 +3721,8 @@ yp_string_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_t
}, },
.opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening), .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.content_loc = YP_LOCATION_TOKEN_VALUE(content), .content_loc = YP_LOCATION_TOKEN_VALUE(content),
.closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing) .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
.unescaped = YP_EMPTY_STRING
}; };
return node; return node;
@ -3766,7 +3779,8 @@ yp_symbol_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_t
}, },
.opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening), .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
.value_loc = YP_LOCATION_TOKEN_VALUE(value), .value_loc = YP_LOCATION_TOKEN_VALUE(value),
.closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing) .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
.unescaped = YP_EMPTY_STRING
}; };
return node; return node;
@ -3788,7 +3802,7 @@ yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
ptrdiff_t length = label.end - label.start; ptrdiff_t length = label.end - label.start;
assert(length >= 0); assert(length >= 0);
yp_unescape_manipulate_string(label.start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list); yp_unescape_manipulate_string(parser, label.start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
break; break;
} }
case YP_TOKEN_MISSING: { case YP_TOKEN_MISSING: {
@ -4073,7 +4087,8 @@ yp_xstring_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_
}, },
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening), .opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
.content_loc = YP_LOCATION_TOKEN_VALUE(content), .content_loc = YP_LOCATION_TOKEN_VALUE(content),
.closing_loc = YP_LOCATION_TOKEN_VALUE(closing) .closing_loc = YP_LOCATION_TOKEN_VALUE(closing),
.unescaped = YP_EMPTY_STRING
}; };
return node; return node;
@ -4113,6 +4128,7 @@ yp_yield_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_lo
} }
#undef YP_EMPTY_STRING
#undef YP_LOCATION_NULL_VALUE #undef YP_LOCATION_NULL_VALUE
#undef YP_LOCATION_TOKEN_VALUE #undef YP_LOCATION_TOKEN_VALUE
#undef YP_LOCATION_NODE_VALUE #undef YP_LOCATION_NODE_VALUE
@ -4331,6 +4347,17 @@ peek(yp_parser_t *parser) {
} }
} }
// Get the next string of length len in the source starting from parser->current.end.
// If the string extends beyond the end of the source, return the empty string ""
static inline const char*
peek_string(yp_parser_t *parser, size_t len) {
if (parser->current.end + len <= parser->end) {
return parser->current.end;
} else {
return "";
}
}
// If the character to be read matches the given value, then returns true and // If the character to be read matches the given value, then returns true and
// advanced the current pointer. // advanced the current pointer.
static inline bool static inline bool
@ -4342,22 +4369,53 @@ match(yp_parser_t *parser, char value) {
return false; return false;
} }
// Skip to the next newline character or NUL byte.
static inline const char *
next_newline(const char *cursor, ptrdiff_t length) {
assert(length >= 0);
// Note that it's okay for us to use memchr here to look for \n because none
// of the encodings that we support have \n as a component of a multi-byte
// character.
return memchr(cursor, '\n', (size_t) length);
}
// Find the start of the encoding comment. This is effectively an inlined
// version of strnstr with some modifications.
static inline const char *
parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdiff_t remaining) {
assert(remaining >= 0);
size_t length = (size_t) remaining;
size_t key_length = strlen("coding:");
if (key_length > length) return NULL;
const char *cursor_limit = cursor + length - key_length + 1;
while ((cursor = yp_memchr(parser, cursor, 'c', (size_t) (cursor_limit - cursor))) != NULL) {
if (
(strncmp(cursor, "coding", key_length - 1) == 0) &&
(cursor[key_length - 1] == ':' || cursor[key_length - 1] == '=')
) {
return cursor + key_length;
}
cursor++;
}
return NULL;
}
// Here we're going to check if this is a "magic" comment, and perform whatever // Here we're going to check if this is a "magic" comment, and perform whatever
// actions are necessary for it here. // actions are necessary for it here.
static void static void
parser_lex_encoding_comment(yp_parser_t *parser) { parser_lex_encoding_comment(yp_parser_t *parser) {
const char *start = parser->current.start + 1; const char *start = parser->current.start + 1;
const char *end = memchr(start, '\n', (size_t) (parser->end - start)); const char *end = next_newline(start, parser->end - start);
if (end == NULL) end = parser->end; if (end == NULL) end = parser->end;
// These are the patterns we're going to match to find the encoding comment. // These are the patterns we're going to match to find the encoding comment.
// This is definitely not complete or even really correct. // This is definitely not complete or even really correct.
const char *encoding_start = NULL; const char *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
if ((encoding_start = yp_strnstr(start, "coding:", (size_t) (end - start))) != NULL) {
encoding_start += 7;
} else if ((encoding_start = yp_strnstr(start, "coding=", (size_t) (end - start))) != NULL) {
encoding_start += 7;
}
// If we didn't find anything that matched our patterns, then return. Note // If we didn't find anything that matched our patterns, then return. Note
// that this does a _very_ poor job of actually finding the encoding, and // that this does a _very_ poor job of actually finding the encoding, and
@ -4370,7 +4428,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
// Now determine the end of the encoding string. This is either the end of // Now determine the end of the encoding string. This is either the end of
// the line, the first whitespace character, or a punctuation mark. // the line, the first whitespace character, or a punctuation mark.
const char *encoding_end = yp_strpbrk(encoding_start, " \t\f\r\v\n;,", end - encoding_start); const char *encoding_end = yp_strpbrk(parser, encoding_start, " \t\f\r\v\n;,", end - encoding_start);
encoding_end = encoding_end == NULL ? end : encoding_end; encoding_end = encoding_end == NULL ? end : encoding_end;
// Finally, we can determine the width of the encoding string. // Finally, we can determine the width of the encoding string.
@ -4392,7 +4450,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
// Extensions like utf-8 can contain extra encoding details like, // Extensions like utf-8 can contain extra encoding details like,
// utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should // utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
// treat any encoding starting utf-8 as utf-8. // treat any encoding starting utf-8 as utf-8.
if (strncasecmp(encoding_start, "utf-8", 5) == 0) { if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, "utf-8", 5) == 0)) {
// We don't need to do anything here because the default encoding is // We don't need to do anything here because the default encoding is
// already UTF-8. We'll just return. // already UTF-8. We'll just return.
return; return;
@ -4401,7 +4459,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
// Next, we're going to loop through each of the encodings that we handle // Next, we're going to loop through each of the encodings that we handle
// explicitly. If we found one that we understand, we'll use that value. // explicitly. If we found one that we understand, we'll use that value.
#define ENCODING(value, prebuilt) \ #define ENCODING(value, prebuilt) \
if (width == sizeof(value) - 1 && strncasecmp(encoding_start, value, sizeof(value) - 1) == 0) { \ if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, value, width) == 0) { \
parser->encoding = prebuilt; \ parser->encoding = prebuilt; \
parser->encoding_changed |= true; \ parser->encoding_changed |= true; \
if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \ if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
@ -4866,7 +4924,8 @@ static yp_token_type_t
lex_keyword(yp_parser_t *parser, const char *value, yp_lex_state_t state, yp_token_type_t type, yp_token_type_t modifier_type) { lex_keyword(yp_parser_t *parser, const char *value, yp_lex_state_t state, yp_token_type_t type, yp_token_type_t modifier_type) {
yp_lex_state_t last_state = parser->lex_state; yp_lex_state_t last_state = parser->lex_state;
if (strncmp(parser->current.start, value, strlen(value)) == 0) { const size_t vlen = strlen(value);
if (parser->current.start + vlen <= parser->end && strncmp(parser->current.start, value, vlen) == 0) {
if (parser->lex_state & YP_LEX_STATE_FNAME) { if (parser->lex_state & YP_LEX_STATE_FNAME) {
lex_state_set(parser, YP_LEX_STATE_ENDFN); lex_state_set(parser, YP_LEX_STATE_ENDFN);
} else { } else {
@ -5275,7 +5334,7 @@ parser_comment(yp_parser_t *parser, yp_comment_type_t type) {
static yp_token_type_t static yp_token_type_t
lex_embdoc(yp_parser_t *parser) { lex_embdoc(yp_parser_t *parser) {
// First, lex out the EMBDOC_BEGIN token. // First, lex out the EMBDOC_BEGIN token.
const char *newline = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end)); const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
if (newline == NULL) { if (newline == NULL) {
parser->current.end = parser->end; parser->current.end = parser->end;
@ -5300,7 +5359,7 @@ lex_embdoc(yp_parser_t *parser) {
// token here. // token here.
if (strncmp(parser->current.end, "=end", 4) == 0 && if (strncmp(parser->current.end, "=end", 4) == 0 &&
(parser->current.end + 4 == parser->end || yp_char_is_whitespace(parser->current.end[4]))) { (parser->current.end + 4 == parser->end || yp_char_is_whitespace(parser->current.end[4]))) {
const char *newline = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end)); const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
if (newline == NULL) { if (newline == NULL) {
parser->current.end = parser->end; parser->current.end = parser->end;
@ -5320,7 +5379,7 @@ lex_embdoc(yp_parser_t *parser) {
// Otherwise, we'll parse until the end of the line and return a line of // Otherwise, we'll parse until the end of the line and return a line of
// embedded documentation. // embedded documentation.
const char *newline = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end)); const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
if (newline == NULL) { if (newline == NULL) {
parser->current.end = parser->end; parser->current.end = parser->end;
@ -5466,9 +5525,9 @@ parser_lex(yp_parser_t *parser) {
LEX(YP_TOKEN_EOF); LEX(YP_TOKEN_EOF);
case '#': { // comments case '#': { // comments
const char *ending = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end)); const char *ending = next_newline(parser->current.end, parser->end - parser->current.end);
while (ending && ending < parser->end && *ending != '\n') { while (ending && ending < parser->end && *ending != '\n') {
ending = memchr(ending + 1, '\n', (size_t) (parser->end - ending)); ending = next_newline(ending + 1, parser->end - ending);
} }
parser->current.end = ending == NULL ? parser->end : ending + 1; parser->current.end = ending == NULL ? parser->end : ending + 1;
@ -5540,7 +5599,7 @@ parser_lex(yp_parser_t *parser) {
// Otherwise we'll return a regular newline. // Otherwise we'll return a regular newline.
if (next_content[0] == '#') { if (next_content[0] == '#') {
// Here we look for a "." or "&." following a "\n". // Here we look for a "." or "&." following a "\n".
const char *following = memchr(next_content, '\n', (size_t) (parser->end - next_content)); const char *following = next_newline(next_content, parser->end - next_content);
while (following && (following < parser->end)) { while (following && (following < parser->end)) {
following++; following++;
@ -5552,7 +5611,7 @@ parser_lex(yp_parser_t *parser) {
// If there is a comment, then we need to find the end of the // If there is a comment, then we need to find the end of the
// comment and continue searching from there. // comment and continue searching from there.
following = memchr(following, '\n', (size_t) (parser->end - following)); following = next_newline(following, parser->end - following);
} }
// If the lex state was ignored, or we hit a '.' or a '&.', // If the lex state was ignored, or we hit a '.' or a '&.',
@ -5785,7 +5844,7 @@ parser_lex(yp_parser_t *parser) {
// = => =~ == === =begin // = => =~ == === =begin
case '=': case '=':
if (current_token_starts_line(parser) && strncmp(parser->current.end, "begin", 5) == 0 && yp_char_is_whitespace(parser->current.end[5])) { if (current_token_starts_line(parser) && strncmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_at(parser, 5))) {
yp_token_type_t type = lex_embdoc(parser); yp_token_type_t type = lex_embdoc(parser);
if (type == YP_TOKEN_EOF) { if (type == YP_TOKEN_EOF) {
@ -5848,19 +5907,21 @@ parser_lex(yp_parser_t *parser) {
const char *ident_start = parser->current.end; const char *ident_start = parser->current.end;
size_t width = 0; size_t width = 0;
if (quote == YP_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) { if (parser->current.end >= parser->end) {
parser->current.end = end;
} else if (quote == YP_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
parser->current.end = end; parser->current.end = end;
} else { } else {
if (quote == YP_HEREDOC_QUOTE_NONE) { if (quote == YP_HEREDOC_QUOTE_NONE) {
parser->current.end += width; parser->current.end += width;
while ((width = char_is_identifier(parser, parser->current.end))) { while ((parser->current.end < parser->end) && (width = char_is_identifier(parser, parser->current.end))) {
parser->current.end += width; parser->current.end += width;
} }
} else { } else {
// If we have quotes, then we're going to go until we find the // If we have quotes, then we're going to go until we find the
// end quote. // end quote.
while (parser->current.end < parser->end && quote != (yp_heredoc_quote_t) (*parser->current.end)) { while ((parser->current.end < parser->end) && quote != (yp_heredoc_quote_t) (*parser->current.end)) {
parser->current.end++; parser->current.end++;
} }
} }
@ -5882,7 +5943,7 @@ parser_lex(yp_parser_t *parser) {
}); });
if (parser->heredoc_end == NULL) { if (parser->heredoc_end == NULL) {
const char *body_start = (const char *) memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end)); const char *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
if (body_start == NULL) { if (body_start == NULL) {
// If there is no newline after the heredoc identifier, then // If there is no newline after the heredoc identifier, then
@ -6465,13 +6526,13 @@ parser_lex(yp_parser_t *parser) {
// Here we'll get a list of the places where strpbrk should break, // Here we'll get a list of the places where strpbrk should break,
// and then find the first one. // and then find the first one.
const char *breakpoints = parser->lex_modes.current->as.list.breakpoints; const char *breakpoints = parser->lex_modes.current->as.list.breakpoints;
const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end); const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
while (breakpoint != NULL) { while (breakpoint != NULL) {
switch (*breakpoint) { switch (*breakpoint) {
case '\0': case '\0':
// If we hit a null byte, skip directly past it. // If we hit a null byte, skip directly past it.
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break; break;
case '\\': { case '\\': {
// If we hit escapes, then we need to treat the next token // If we hit escapes, then we need to treat the next token
@ -6492,7 +6553,7 @@ parser_lex(yp_parser_t *parser) {
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1); yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
} }
breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference)); breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
break; break;
} }
case ' ': case ' ':
@ -6517,7 +6578,7 @@ parser_lex(yp_parser_t *parser) {
// that looked like an interpolated class or instance variable // that looked like an interpolated class or instance variable
// like "#@" but wasn't actually. In this case we'll just skip // like "#@" but wasn't actually. In this case we'll just skip
// to the next breakpoint. // to the next breakpoint.
breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end); breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
break; break;
} }
} }
@ -6526,7 +6587,7 @@ parser_lex(yp_parser_t *parser) {
if (*breakpoint == parser->lex_modes.current->as.list.incrementor) { if (*breakpoint == parser->lex_modes.current->as.list.incrementor) {
// If we've hit the incrementor, then we need to skip past it and // If we've hit the incrementor, then we need to skip past it and
// find the next breakpoint. // find the next breakpoint.
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
parser->lex_modes.current->as.list.nesting++; parser->lex_modes.current->as.list.nesting++;
break; break;
} }
@ -6537,7 +6598,7 @@ parser_lex(yp_parser_t *parser) {
// If this terminator doesn't actually close the list, then we need // If this terminator doesn't actually close the list, then we need
// to continue on past it. // to continue on past it.
if (parser->lex_modes.current->as.list.nesting > 0) { if (parser->lex_modes.current->as.list.nesting > 0) {
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
parser->lex_modes.current->as.list.nesting--; parser->lex_modes.current->as.list.nesting--;
break; break;
} }
@ -6577,13 +6638,13 @@ parser_lex(yp_parser_t *parser) {
// regular expression. We'll use strpbrk to find the first of these // regular expression. We'll use strpbrk to find the first of these
// characters. // characters.
const char *breakpoints = parser->lex_modes.current->as.regexp.breakpoints; const char *breakpoints = parser->lex_modes.current->as.regexp.breakpoints;
const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end); const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
while (breakpoint != NULL) { while (breakpoint != NULL) {
switch (*breakpoint) { switch (*breakpoint) {
case '\0': case '\0':
// If we hit a null byte, skip directly past it. // If we hit a null byte, skip directly past it.
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break; break;
case '\\': { case '\\': {
// If we hit escapes, then we need to treat the next token // If we hit escapes, then we need to treat the next token
@ -6597,7 +6658,7 @@ parser_lex(yp_parser_t *parser) {
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1); yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
} }
breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference)); breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
break; break;
} }
case '#': { case '#': {
@ -6613,7 +6674,7 @@ parser_lex(yp_parser_t *parser) {
// that looked like an interpolated class or instance variable // that looked like an interpolated class or instance variable
// like "#@" but wasn't actually. In this case we'll just skip // like "#@" but wasn't actually. In this case we'll just skip
// to the next breakpoint. // to the next breakpoint.
breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end); breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
break; break;
} }
} }
@ -6622,7 +6683,7 @@ parser_lex(yp_parser_t *parser) {
if (*breakpoint == parser->lex_modes.current->as.regexp.incrementor) { if (*breakpoint == parser->lex_modes.current->as.regexp.incrementor) {
// If we've hit the incrementor, then we need to skip past it and // If we've hit the incrementor, then we need to skip past it and
// find the next breakpoint. // find the next breakpoint.
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
parser->lex_modes.current->as.regexp.nesting++; parser->lex_modes.current->as.regexp.nesting++;
break; break;
} }
@ -6635,7 +6696,7 @@ parser_lex(yp_parser_t *parser) {
if (parser->lex_modes.current->as.regexp.terminator != '\n') { if (parser->lex_modes.current->as.regexp.terminator != '\n') {
// If the terminator is not a newline, then we // If the terminator is not a newline, then we
// can set the next breakpoint and continue. // can set the next breakpoint and continue.
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break; break;
} }
@ -6646,7 +6707,7 @@ parser_lex(yp_parser_t *parser) {
assert(*breakpoint == parser->lex_modes.current->as.regexp.terminator); assert(*breakpoint == parser->lex_modes.current->as.regexp.terminator);
if (parser->lex_modes.current->as.regexp.nesting > 0) { if (parser->lex_modes.current->as.regexp.nesting > 0) {
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
parser->lex_modes.current->as.regexp.nesting--; parser->lex_modes.current->as.regexp.nesting--;
break; break;
} }
@ -6694,7 +6755,7 @@ parser_lex(yp_parser_t *parser) {
// These are the places where we need to split up the content of the // These are the places where we need to split up the content of the
// string. We'll use strpbrk to find the first of these characters. // string. We'll use strpbrk to find the first of these characters.
const char *breakpoints = parser->lex_modes.current->as.string.breakpoints; const char *breakpoints = parser->lex_modes.current->as.string.breakpoints;
const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end); const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
while (breakpoint != NULL) { while (breakpoint != NULL) {
// If we hit the incrementor, then we'll increment then nesting and // If we hit the incrementor, then we'll increment then nesting and
@ -6704,7 +6765,7 @@ parser_lex(yp_parser_t *parser) {
*breakpoint == parser->lex_modes.current->as.string.incrementor *breakpoint == parser->lex_modes.current->as.string.incrementor
) { ) {
parser->lex_modes.current->as.string.nesting++; parser->lex_modes.current->as.string.nesting++;
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
continue; continue;
} }
@ -6715,7 +6776,7 @@ parser_lex(yp_parser_t *parser) {
// If this terminator doesn't actually close the string, then we need // If this terminator doesn't actually close the string, then we need
// to continue on past it. // to continue on past it.
if (parser->lex_modes.current->as.string.nesting > 0) { if (parser->lex_modes.current->as.string.nesting > 0) {
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
parser->lex_modes.current->as.string.nesting--; parser->lex_modes.current->as.string.nesting--;
continue; continue;
} }
@ -6762,7 +6823,7 @@ parser_lex(yp_parser_t *parser) {
if (*breakpoint == '\n') { if (*breakpoint == '\n') {
if (parser->heredoc_end == NULL) { if (parser->heredoc_end == NULL) {
yp_newline_list_append(&parser->newline_list, breakpoint); yp_newline_list_append(&parser->newline_list, breakpoint);
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
continue; continue;
} else { } else {
parser->current.end = breakpoint + 1; parser->current.end = breakpoint + 1;
@ -6774,7 +6835,7 @@ parser_lex(yp_parser_t *parser) {
switch (*breakpoint) { switch (*breakpoint) {
case '\0': case '\0':
// Skip directly past the null character. // Skip directly past the null character.
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break; break;
case '\\': { case '\\': {
// If we hit escapes, then we need to treat the next token // If we hit escapes, then we need to treat the next token
@ -6789,7 +6850,7 @@ parser_lex(yp_parser_t *parser) {
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1); yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
} }
breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference)); breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
break; break;
} }
case '#': { case '#': {
@ -6802,7 +6863,7 @@ parser_lex(yp_parser_t *parser) {
// looked like an interpolated class or instance variable like "#@" // looked like an interpolated class or instance variable like "#@"
// but wasn't actually. In this case we'll just skip to the next // but wasn't actually. In this case we'll just skip to the next
// breakpoint. // breakpoint.
breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end); breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
break; break;
} }
default: default:
@ -6844,7 +6905,7 @@ parser_lex(yp_parser_t *parser) {
start += yp_strspn_inline_whitespace(start, parser->end - start); start += yp_strspn_inline_whitespace(start, parser->end - start);
} }
if (strncmp(start, ident_start, ident_length) == 0) { if ((start + ident_length <= parser->end) && (strncmp(start, ident_start, ident_length) == 0)) {
bool matched = true; bool matched = true;
bool at_end = false; bool at_end = false;
@ -6888,13 +6949,13 @@ parser_lex(yp_parser_t *parser) {
breakpoints[2] = '\0'; breakpoints[2] = '\0';
} }
const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end); const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
while (breakpoint != NULL) { while (breakpoint != NULL) {
switch (*breakpoint) { switch (*breakpoint) {
case '\0': case '\0':
// Skip directly past the null character. // Skip directly past the null character.
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break; break;
case '\n': { case '\n': {
yp_newline_list_append(&parser->newline_list, breakpoint); yp_newline_list_append(&parser->newline_list, breakpoint);
@ -6939,7 +7000,7 @@ parser_lex(yp_parser_t *parser) {
// Otherwise we hit a newline and it wasn't followed by a // Otherwise we hit a newline and it wasn't followed by a
// terminator, so we can continue parsing. // terminator, so we can continue parsing.
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
break; break;
} }
case '\\': { case '\\': {
@ -6956,7 +7017,7 @@ parser_lex(yp_parser_t *parser) {
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1); yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
} }
breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference)); breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
} }
break; break;
} }
@ -6970,7 +7031,7 @@ parser_lex(yp_parser_t *parser) {
// that looked like an interpolated class or instance variable // that looked like an interpolated class or instance variable
// like "#@" but wasn't actually. In this case we'll just skip // like "#@" but wasn't actually. In this case we'll just skip
// to the next breakpoint. // to the next breakpoint.
breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end); breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
break; break;
} }
default: default:
@ -7007,7 +7068,7 @@ yp_regular_expression_node_create_and_unescape(yp_parser_t *parser, const yp_tok
ptrdiff_t length = content->end - content->start; ptrdiff_t length = content->end - content->start;
assert(length >= 0); assert(length >= 0);
yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list); yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
return node; return node;
} }
@ -7018,7 +7079,7 @@ yp_symbol_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
ptrdiff_t length = content->end - content->start; ptrdiff_t length = content->end - content->start;
assert(length >= 0); assert(length >= 0);
yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list); yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
return node; return node;
} }
@ -7029,7 +7090,7 @@ yp_string_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
ptrdiff_t length = content->end - content->start; ptrdiff_t length = content->end - content->start;
assert(length >= 0); assert(length >= 0);
yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list); yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
return node; return node;
} }
@ -7040,7 +7101,7 @@ yp_xstring_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openi
ptrdiff_t length = content->end - content->start; ptrdiff_t length = content->end - content->start;
assert(length >= 0); assert(length >= 0);
yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list); yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
return node; return node;
} }
@ -7505,10 +7566,10 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
// the previous method name in, and append an =. // the previous method name in, and append an =.
size_t length = yp_string_length(&call->name); size_t length = yp_string_length(&call->name);
char *name = malloc(length + 2); char *name = calloc(length + 2, sizeof(char));
if (name == NULL) return NULL; if (name == NULL) return NULL;
sprintf(name, "%.*s=", (int) length, yp_string_source(&call->name)); yp_snprintf(name, length + 2, "%.*s=", (int) length, yp_string_source(&call->name));
// Now switch the name to the new string. // Now switch the name to the new string.
yp_string_free(&call->name); yp_string_free(&call->name);
@ -8954,9 +9015,11 @@ parse_string_part(yp_parser_t *parser) {
static yp_node_t * static yp_node_t *
parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_state) { parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_state) {
bool lex_string = lex_mode->mode == YP_LEX_STRING;
bool lex_interpolation = lex_string && lex_mode->as.string.interpolation;
yp_token_t opening = parser->previous; yp_token_t opening = parser->previous;
if (lex_mode->mode != YP_LEX_STRING) { if (!lex_string) {
if (next_state != YP_LEX_STATE_NONE) { if (next_state != YP_LEX_STATE_NONE) {
lex_state_set(parser, next_state); lex_state_set(parser, next_state);
} }
@ -8990,9 +9053,9 @@ parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_s
} }
// If we weren't in a string in the previous check then we have to be now. // If we weren't in a string in the previous check then we have to be now.
assert(lex_mode->mode == YP_LEX_STRING); assert(lex_string);
if (lex_mode->as.string.interpolation) { if (lex_interpolation) {
yp_interpolated_symbol_node_t *interpolated = yp_interpolated_symbol_node_create(parser, &opening, NULL, &opening); yp_interpolated_symbol_node_t *interpolated = yp_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) { while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
@ -9043,9 +9106,10 @@ parse_undef_argument(yp_parser_t *parser) {
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL); return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL);
} }
case YP_TOKEN_SYMBOL_BEGIN: { case YP_TOKEN_SYMBOL_BEGIN: {
yp_lex_mode_t *lex_mode = parser->lex_modes.current; yp_lex_mode_t lex_mode = *parser->lex_modes.current;
parser_lex(parser); parser_lex(parser);
return parse_symbol(parser, lex_mode, YP_LEX_STATE_NONE);
return parse_symbol(parser, &lex_mode, YP_LEX_STATE_NONE);
} }
default: default:
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Expected a bare word or symbol argument."); yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Expected a bare word or symbol argument.");
@ -9075,10 +9139,10 @@ parse_alias_argument(yp_parser_t *parser, bool first) {
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL); return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL);
} }
case YP_TOKEN_SYMBOL_BEGIN: { case YP_TOKEN_SYMBOL_BEGIN: {
yp_lex_mode_t *lex_mode = parser->lex_modes.current; yp_lex_mode_t lex_mode = *parser->lex_modes.current;
parser_lex(parser); parser_lex(parser);
return parse_symbol(parser, lex_mode, first ? YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM : YP_LEX_STATE_NONE); return parse_symbol(parser, &lex_mode, first ? YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM : YP_LEX_STATE_NONE);
} }
case YP_TOKEN_BACK_REFERENCE: case YP_TOKEN_BACK_REFERENCE:
parser_lex(parser); parser_lex(parser);
@ -9177,7 +9241,7 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
common_whitespace = cur_whitespace; common_whitespace = cur_whitespace;
} }
cur_char = memchr(cur_char + 1, '\n', (size_t) (parser->end - (cur_char + 1))); cur_char = next_newline(cur_char + 1, parser->end - (cur_char + 1));
if (cur_char) cur_char++; if (cur_char) cur_char++;
} }
} }
@ -9252,7 +9316,7 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
// At this point we have dedented all that we need to, so we need to find // At this point we have dedented all that we need to, so we need to find
// the next newline. // the next newline.
const char *breakpoint = memchr(source_cursor, '\n', (size_t) (source_end - source_cursor)); const char *breakpoint = next_newline(source_cursor, source_end - source_cursor);
if (breakpoint == NULL) { if (breakpoint == NULL) {
// If there isn't another newline, then we can just move the rest of the // If there isn't another newline, then we can just move the rest of the
@ -9293,92 +9357,106 @@ parse_pattern_constant_path(yp_parser_t *parser, yp_node_t *node) {
// If there is a [ or ( that follows, then this is part of a larger pattern // If there is a [ or ( that follows, then this is part of a larger pattern
// expression. We'll parse the inner pattern here, then modify the returned // expression. We'll parse the inner pattern here, then modify the returned
// inner pattern with our constant path attached. // inner pattern with our constant path attached.
if (match_any_type_p(parser, 2, YP_TOKEN_BRACKET_LEFT, YP_TOKEN_PARENTHESIS_LEFT)) { if (!match_any_type_p(parser, 2, YP_TOKEN_BRACKET_LEFT, YP_TOKEN_PARENTHESIS_LEFT)) {
yp_token_t opening; return node;
yp_token_t closing;
yp_node_t *inner = NULL;
if (accept(parser, YP_TOKEN_BRACKET_LEFT)) {
opening = parser->previous;
accept(parser, YP_TOKEN_NEWLINE);
if (!accept(parser, YP_TOKEN_BRACKET_RIGHT)) {
inner = parse_pattern(parser, true, "Expected a pattern expression after the [ operator.");
accept(parser, YP_TOKEN_NEWLINE);
expect(parser, YP_TOKEN_BRACKET_RIGHT, "Expected a ] to close the pattern expression.");
}
closing = parser->previous;
} else {
parser_lex(parser);
opening = parser->previous;
if (!accept(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
inner = parse_pattern(parser, true, "Expected a pattern expression after the ( operator.");
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected a ) to close the pattern expression.");
}
closing = parser->previous;
}
if (inner) {
// Now that we have the inner pattern, check to see if it's an array, find,
// or hash pattern. If it is, then we'll attach our constant path to it. If
// it's not, then we'll create an array pattern.
switch (inner->type) {
case YP_NODE_ARRAY_PATTERN_NODE: {
yp_array_pattern_node_t *pattern_node = (yp_array_pattern_node_t *)inner;
pattern_node->base.location.start = node->location.start;
pattern_node->base.location.end = closing.end;
pattern_node->constant = node;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
node = (yp_node_t *)pattern_node;
break;
}
case YP_NODE_FIND_PATTERN_NODE: {
yp_find_pattern_node_t *pattern_node = (yp_find_pattern_node_t *) inner;
pattern_node->base.location.start = node->location.start;
pattern_node->base.location.end = closing.end;
pattern_node->constant = node;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
node = (yp_node_t *) pattern_node;
break;
}
case YP_NODE_HASH_PATTERN_NODE: {
yp_hash_pattern_node_t *pattern_node = (yp_hash_pattern_node_t *)inner;
pattern_node->base.location.start = node->location.start;
pattern_node->base.location.end = closing.end;
pattern_node->constant = node;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
node = (yp_node_t *) pattern_node;
break;
}
default: {
yp_array_pattern_node_t *pattern_node = yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
yp_array_pattern_node_requireds_append(pattern_node, inner);
node = (yp_node_t *)pattern_node;
break;
}
}
} else {
// If there was no inner pattern, then we have something like Foo() or
// Foo[]. In that case we'll create an array pattern with no requireds.
node = (yp_node_t *)yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
}
} }
return node; yp_token_t opening;
yp_token_t closing;
yp_node_t *inner = NULL;
if (accept(parser, YP_TOKEN_BRACKET_LEFT)) {
opening = parser->previous;
accept(parser, YP_TOKEN_NEWLINE);
if (!accept(parser, YP_TOKEN_BRACKET_RIGHT)) {
inner = parse_pattern(parser, true, "Expected a pattern expression after the [ operator.");
accept(parser, YP_TOKEN_NEWLINE);
expect(parser, YP_TOKEN_BRACKET_RIGHT, "Expected a ] to close the pattern expression.");
}
closing = parser->previous;
} else {
parser_lex(parser);
opening = parser->previous;
if (!accept(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
inner = parse_pattern(parser, true, "Expected a pattern expression after the ( operator.");
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected a ) to close the pattern expression.");
}
closing = parser->previous;
}
if (!inner) {
// If there was no inner pattern, then we have something like Foo() or
// Foo[]. In that case we'll create an array pattern with no requireds.
return (yp_node_t *) yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
}
// Now that we have the inner pattern, check to see if it's an array, find,
// or hash pattern. If it is, then we'll attach our constant path to it if
// it doesn't already have a constant. If it's not one of those node types
// or it does have a constant, then we'll create an array pattern.
switch (inner->type) {
case YP_NODE_ARRAY_PATTERN_NODE: {
yp_array_pattern_node_t *pattern_node = (yp_array_pattern_node_t *) inner;
if (pattern_node->constant == NULL) {
pattern_node->base.location.start = node->location.start;
pattern_node->base.location.end = closing.end;
pattern_node->constant = node;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
return (yp_node_t *) pattern_node;
}
break;
}
case YP_NODE_FIND_PATTERN_NODE: {
yp_find_pattern_node_t *pattern_node = (yp_find_pattern_node_t *) inner;
if (pattern_node->constant == NULL) {
pattern_node->base.location.start = node->location.start;
pattern_node->base.location.end = closing.end;
pattern_node->constant = node;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
return (yp_node_t *) pattern_node;
}
break;
}
case YP_NODE_HASH_PATTERN_NODE: {
yp_hash_pattern_node_t *pattern_node = (yp_hash_pattern_node_t *) inner;
if (pattern_node->constant == NULL) {
pattern_node->base.location.start = node->location.start;
pattern_node->base.location.end = closing.end;
pattern_node->constant = node;
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
return (yp_node_t *) pattern_node;
}
break;
}
default:
break;
}
// If we got here, then we didn't return one of the inner patterns by
// attaching its constant. In this case we'll create an array pattern and
// attach our constant to it.
yp_array_pattern_node_t *pattern_node = yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
yp_array_pattern_node_requireds_append(pattern_node, inner);
return (yp_node_t *) pattern_node;
} }
// Parse a rest pattern. // Parse a rest pattern.
@ -9897,8 +9975,6 @@ parse_pattern(yp_parser_t *parser, bool top_pattern, const char *message) {
// Parse an expression that begins with the previous node that we just lexed. // Parse an expression that begins with the previous node that we just lexed.
static inline yp_node_t * static inline yp_node_t *
parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
yp_lex_mode_t *lex_mode = parser->lex_modes.current;
switch (parser->current.type) { switch (parser->current.type) {
case YP_TOKEN_BRACKET_LEFT_ARRAY: { case YP_TOKEN_BRACKET_LEFT_ARRAY: {
parser_lex(parser); parser_lex(parser);
@ -11015,7 +11091,10 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
lex_state_set(parser, YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM); lex_state_set(parser, YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM);
parser_lex(parser); parser_lex(parser);
name = parse_undef_argument(parser); name = parse_undef_argument(parser);
if (name->type == YP_NODE_MISSING_NODE) break; if (name->type == YP_NODE_MISSING_NODE) {
yp_node_destroy(parser, name);
break;
}
yp_undef_node_append(undef, name); yp_undef_node_append(undef, name);
} }
@ -11043,6 +11122,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
receiver = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, "Expected expression after `not`."); receiver = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, "Expected expression after `not`.");
if (!parser->recovering) { if (!parser->recovering) {
accept(parser, YP_TOKEN_NEWLINE);
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected ')' after 'not' expression."); expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected ')' after 'not' expression.");
arguments.closing_loc = ((yp_location_t) { .start = parser->previous.start, .end = parser->previous.end }); arguments.closing_loc = ((yp_location_t) { .start = parser->previous.start, .end = parser->previous.end });
} }
@ -11727,9 +11807,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
return (yp_node_t *) node; return (yp_node_t *) node;
} }
case YP_TOKEN_STRING_BEGIN: { case YP_TOKEN_STRING_BEGIN: {
assert(parser->lex_modes.current->mode == YP_LEX_STRING);
bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
yp_token_t opening = parser->current;
parser_lex(parser); parser_lex(parser);
yp_token_t opening = parser->previous;
yp_node_t *node; yp_node_t *node;
if (accept(parser, YP_TOKEN_STRING_END)) { if (accept(parser, YP_TOKEN_STRING_END)) {
@ -11754,7 +11837,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
}; };
return (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous); return (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
} else if (!lex_mode->as.string.interpolation) { } else if (!lex_interpolation) {
// If we don't accept interpolation then we expect the string to start // If we don't accept interpolation then we expect the string to start
// with a single string content node. // with a single string content node.
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter."); expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter.");
@ -11858,9 +11941,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
return node; return node;
} }
} }
case YP_TOKEN_SYMBOL_BEGIN: case YP_TOKEN_SYMBOL_BEGIN: {
yp_lex_mode_t lex_mode = *parser->lex_modes.current;
parser_lex(parser); parser_lex(parser);
return parse_symbol(parser, lex_mode, YP_LEX_STATE_END);
return parse_symbol(parser, &lex_mode, YP_LEX_STATE_END);
}
default: default:
if (context_recoverable(parser, &parser->current)) { if (context_recoverable(parser, &parser->current)) {
parser->recovering = true; parser->recovering = true;
@ -12482,82 +12568,8 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
return path; return path;
} }
case YP_TOKEN_AMPERSAND: case YP_CASE_OPERATOR:
case YP_TOKEN_BACKTICK: case YP_CASE_KEYWORD:
case YP_TOKEN_BANG:
case YP_TOKEN_BANG_EQUAL:
case YP_TOKEN_BANG_TILDE:
case YP_TOKEN_CARET:
case YP_TOKEN_EQUAL_EQUAL:
case YP_TOKEN_EQUAL_EQUAL_EQUAL:
case YP_TOKEN_EQUAL_TILDE:
case YP_TOKEN_GREATER:
case YP_TOKEN_GREATER_EQUAL:
case YP_TOKEN_GREATER_GREATER:
case YP_TOKEN_HEREDOC_START:
case YP_TOKEN_IGNORED_NEWLINE:
case YP_TOKEN_KEYWORD_ALIAS:
case YP_TOKEN_KEYWORD_AND:
case YP_TOKEN_KEYWORD_BEGIN:
case YP_TOKEN_KEYWORD_BEGIN_UPCASE:
case YP_TOKEN_KEYWORD_BREAK:
case YP_TOKEN_KEYWORD_CASE:
case YP_TOKEN_KEYWORD_CLASS:
case YP_TOKEN_KEYWORD_DEF:
case YP_TOKEN_KEYWORD_DEFINED:
case YP_TOKEN_KEYWORD_DO:
case YP_TOKEN_KEYWORD_ELSE:
case YP_TOKEN_KEYWORD_ELSIF:
case YP_TOKEN_KEYWORD_END:
case YP_TOKEN_KEYWORD_END_UPCASE:
case YP_TOKEN_KEYWORD_ENSURE:
case YP_TOKEN_KEYWORD_FALSE:
case YP_TOKEN_KEYWORD_FOR:
case YP_TOKEN_KEYWORD_IF:
case YP_TOKEN_KEYWORD_IN:
case YP_TOKEN_KEYWORD_NEXT:
case YP_TOKEN_KEYWORD_NIL:
case YP_TOKEN_KEYWORD_NOT:
case YP_TOKEN_KEYWORD_OR:
case YP_TOKEN_KEYWORD_REDO:
case YP_TOKEN_KEYWORD_RESCUE:
case YP_TOKEN_KEYWORD_RETRY:
case YP_TOKEN_KEYWORD_RETURN:
case YP_TOKEN_KEYWORD_SELF:
case YP_TOKEN_KEYWORD_SUPER:
case YP_TOKEN_KEYWORD_THEN:
case YP_TOKEN_KEYWORD_TRUE:
case YP_TOKEN_KEYWORD_UNDEF:
case YP_TOKEN_KEYWORD_UNLESS:
case YP_TOKEN_KEYWORD_UNTIL:
case YP_TOKEN_KEYWORD_WHEN:
case YP_TOKEN_KEYWORD_WHILE:
case YP_TOKEN_KEYWORD_YIELD:
case YP_TOKEN_KEYWORD___ENCODING__:
case YP_TOKEN_KEYWORD___FILE__:
case YP_TOKEN_KEYWORD___LINE__:
case YP_TOKEN_LESS:
case YP_TOKEN_LESS_EQUAL:
case YP_TOKEN_LESS_EQUAL_GREATER:
case YP_TOKEN_LESS_LESS:
case YP_TOKEN_MINUS:
case YP_TOKEN_PERCENT:
case YP_TOKEN_PERCENT_LOWER_I:
case YP_TOKEN_PERCENT_LOWER_W:
case YP_TOKEN_PERCENT_LOWER_X:
case YP_TOKEN_PERCENT_UPPER_I:
case YP_TOKEN_PERCENT_UPPER_W:
case YP_TOKEN_PIPE:
case YP_TOKEN_PLUS:
case YP_TOKEN_REGEXP_BEGIN:
case YP_TOKEN_SLASH:
case YP_TOKEN_STAR:
case YP_TOKEN_STAR_STAR:
case YP_TOKEN_TILDE:
case YP_TOKEN_UCOLON_COLON:
case YP_TOKEN_UDOT_DOT:
case YP_TOKEN_UDOT_DOT_DOT:
case YP_TOKEN___END__:
case YP_TOKEN_IDENTIFIER: { case YP_TOKEN_IDENTIFIER: {
parser_lex(parser); parser_lex(parser);
@ -12805,7 +12817,7 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
} else if (size >= 2 && source[0] == '#' && source[1] == '!') { } else if (size >= 2 && source[0] == '#' && source[1] == '!') {
// If the first two bytes of the source are a shebang, then we'll indicate // If the first two bytes of the source are a shebang, then we'll indicate
// that the encoding comment is at the end of the shebang. // that the encoding comment is at the end of the shebang.
const char *encoding_comment_start = memchr(source, '\n', size); const char *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
if (encoding_comment_start) { if (encoding_comment_start) {
parser->encoding_comment_start = encoding_comment_start + 1; parser->encoding_comment_start = encoding_comment_start + 1;
} }
@ -12891,6 +12903,3 @@ yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer) {
#undef YP_CASE_KEYWORD #undef YP_CASE_KEYWORD
#undef YP_CASE_OPERATOR #undef YP_CASE_OPERATOR
#undef YP_CASE_WRITABLE #undef YP_CASE_WRITABLE
#undef YP_STRINGIZE
#undef YP_STRINGIZE0
#undef YP_VERSION_MACRO

View File

@ -2,19 +2,6 @@
#define YARP_H #define YARP_H
#include "yarp/defines.h" #include "yarp/defines.h"
#include <assert.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifndef _WIN32
#include <strings.h>
#endif
#include "yarp/missing.h"
#include "yarp/ast.h" #include "yarp/ast.h"
#include "yarp/diagnostic.h" #include "yarp/diagnostic.h"
#include "yarp/node.h" #include "yarp/node.h"
@ -24,17 +11,26 @@
#include "yarp/unescape.h" #include "yarp/unescape.h"
#include "yarp/util/yp_buffer.h" #include "yarp/util/yp_buffer.h"
#include "yarp/util/yp_char.h" #include "yarp/util/yp_char.h"
#include "yarp/util/yp_memchr.h"
#include "yarp/util/yp_strpbrk.h" #include "yarp/util/yp_strpbrk.h"
#define YP_VERSION_MAJOR 0 #include <assert.h>
#define YP_VERSION_MINOR 4 #include <stdarg.h>
#define YP_VERSION_PATCH 0 #include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifndef _WIN32
#include <strings.h>
#endif
void yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer); void yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
void yp_print_node(yp_parser_t *parser, yp_node_t *node); void yp_print_node(yp_parser_t *parser, yp_node_t *node);
// Returns the YARP version and notably the serialization format // The YARP version and the serialization format.
YP_EXPORTED_FUNCTION const char * yp_version(void); YP_EXPORTED_FUNCTION const char * yp_version(void);
// Initialize a parser with the given start and end pointers. // Initialize a parser with the given start and end pointers.
@ -57,20 +53,6 @@ YP_EXPORTED_FUNCTION void yp_parser_free(yp_parser_t *parser);
// Parse the Ruby source associated with the given parser and return the tree. // Parse the Ruby source associated with the given parser and return the tree.
YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser); YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser);
// Deallocate a node and all of its children.
YP_EXPORTED_FUNCTION void yp_node_destroy(yp_parser_t *parser, struct yp_node *node);
// This struct stores the information gathered by the yp_node_memsize function.
// It contains both the memory footprint and additionally metadata about the
// shape of the tree.
typedef struct {
size_t memsize;
size_t node_count;
} yp_memsize_t;
// Calculates the memory footprint of a given node.
YP_EXPORTED_FUNCTION void yp_node_memsize(yp_node_t *node, yp_memsize_t *memsize);
// Pretty-prints the AST represented by the given node to the given buffer. // Pretty-prints the AST represented by the given node to the given buffer.
YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer); YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);