Manual YARP resync
This commit is contained in:
parent
6f9d1b4b0f
commit
bfb933371d
Notes:
git
2023-07-05 20:59:16 +00:00
162
lib/yarp.rb
162
lib/yarp.rb
@ -1,17 +1,79 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module YARP
|
||||
# This represents a location in the source corresponding to a node or token.
|
||||
class Location
|
||||
attr_reader :start_offset, :length
|
||||
# This represents a source of Ruby code that has been parsed. It is used in
|
||||
# conjunction with locations to allow them to resolve line numbers and source
|
||||
# ranges.
|
||||
class Source
|
||||
attr_reader :source, :offsets
|
||||
|
||||
def initialize(start_offset, length)
|
||||
def initialize(source, offsets)
|
||||
@source = source
|
||||
@offsets = offsets
|
||||
end
|
||||
|
||||
def slice(offset, length)
|
||||
source.byteslice(offset, length)
|
||||
end
|
||||
|
||||
def line(value)
|
||||
offsets.bsearch_index { |offset| offset > value } || offsets.length
|
||||
end
|
||||
|
||||
def column(value)
|
||||
value - offsets[line(value) - 1]
|
||||
end
|
||||
end
|
||||
|
||||
# This represents a location in the source.
|
||||
class Location
|
||||
# A Source object that is used to determine more information from the given
|
||||
# offset and length.
|
||||
private attr_reader :source
|
||||
|
||||
# The byte offset from the beginning of the source where this location
|
||||
# starts.
|
||||
attr_reader :start_offset
|
||||
|
||||
# The length of this location in bytes.
|
||||
attr_reader :length
|
||||
|
||||
def initialize(source, start_offset, length)
|
||||
@source = source
|
||||
@start_offset = start_offset
|
||||
@length = length
|
||||
end
|
||||
|
||||
# The source code that this location represents.
|
||||
def slice
|
||||
source.slice(start_offset, length)
|
||||
end
|
||||
|
||||
# The byte offset from the beginning of the source where this location ends.
|
||||
def end_offset
|
||||
@start_offset + @length
|
||||
start_offset + length
|
||||
end
|
||||
|
||||
# The line number where this location starts.
|
||||
def start_line
|
||||
source.line(start_offset)
|
||||
end
|
||||
|
||||
# The line number where this location ends.
|
||||
def end_line
|
||||
source.line(end_offset - 1)
|
||||
end
|
||||
|
||||
# The column number in bytes where this location starts from the start of
|
||||
# the line.
|
||||
def start_column
|
||||
source.column(start_offset)
|
||||
end
|
||||
|
||||
# The column number in bytes where this location ends from the start of the
|
||||
# line.
|
||||
def end_column
|
||||
source.column(end_offset - 1)
|
||||
end
|
||||
|
||||
def deconstruct_keys(keys)
|
||||
@ -101,21 +163,12 @@ module YARP
|
||||
|
||||
# This represents a token from the Ruby source.
|
||||
class Token
|
||||
attr_reader :type, :value, :start_offset, :length
|
||||
attr_reader :type, :value, :location
|
||||
|
||||
def initialize(type, value, start_offset, length)
|
||||
def initialize(type, value, location)
|
||||
@type = type
|
||||
@value = value
|
||||
@start_offset = start_offset
|
||||
@length = length
|
||||
end
|
||||
|
||||
def end_offset
|
||||
@start_offset + @length
|
||||
end
|
||||
|
||||
def location
|
||||
Location.new(@start_offset, @length)
|
||||
@location = location
|
||||
end
|
||||
|
||||
def deconstruct_keys(keys)
|
||||
@ -143,20 +196,12 @@ module YARP
|
||||
|
||||
# This represents a node in the tree.
|
||||
class Node
|
||||
attr_reader :start_offset, :length
|
||||
|
||||
def end_offset
|
||||
@start_offset + @length
|
||||
end
|
||||
|
||||
def location
|
||||
Location.new(@start_offset, @length)
|
||||
end
|
||||
attr_reader :location
|
||||
|
||||
def pretty_print(q)
|
||||
q.group do
|
||||
q.text(self.class.name.split("::").last)
|
||||
self.location.pretty_print(q)
|
||||
location.pretty_print(q)
|
||||
q.text("(")
|
||||
q.nest(2) do
|
||||
deconstructed = deconstruct_keys([])
|
||||
@ -171,67 +216,10 @@ module YARP
|
||||
end
|
||||
end
|
||||
|
||||
# A class that knows how to walk down the tree. None of the individual visit
|
||||
# methods are implemented on this visitor, so it forces the consumer to
|
||||
# implement each one that they need. For a default implementation that
|
||||
# continues walking the tree, see the Visitor class.
|
||||
class BasicVisitor
|
||||
def visit(node)
|
||||
node&.accept(self)
|
||||
end
|
||||
|
||||
def visit_all(nodes)
|
||||
nodes.map { |node| visit(node) }
|
||||
end
|
||||
|
||||
def visit_child_nodes(node)
|
||||
visit_all(node.child_nodes)
|
||||
end
|
||||
end
|
||||
|
||||
# This lexes with the Ripper lex. It drops any space events but otherwise
|
||||
# returns the same tokens.
|
||||
# [raises SyntaxError] if the syntax in source is invalid
|
||||
def self.lex_ripper(source)
|
||||
previous = []
|
||||
results = []
|
||||
|
||||
Ripper.lex(source, raise_errors: true).each do |token|
|
||||
case token[1]
|
||||
when :on_sp
|
||||
# skip
|
||||
when :on_tstring_content
|
||||
if previous[1] == :on_tstring_content &&
|
||||
(token[2].start_with?("\#$") || token[2].start_with?("\#@"))
|
||||
previous[2] << token[2]
|
||||
else
|
||||
results << token
|
||||
previous = token
|
||||
end
|
||||
when :on_words_sep
|
||||
if previous[1] == :on_words_sep
|
||||
previous[2] << token[2]
|
||||
else
|
||||
results << token
|
||||
previous = token
|
||||
end
|
||||
else
|
||||
results << token
|
||||
previous = token
|
||||
end
|
||||
end
|
||||
|
||||
results
|
||||
end
|
||||
|
||||
# Load the serialized AST using the source as a reference into a tree.
|
||||
def self.load(source, serialized)
|
||||
Serialize.load(source, serialized)
|
||||
end
|
||||
|
||||
def self.parse(source, filepath=nil)
|
||||
_parse(source, filepath)
|
||||
end
|
||||
end
|
||||
|
||||
require_relative "yarp/lex_compat"
|
||||
@ -240,9 +228,3 @@ require_relative "yarp/ripper_compat"
|
||||
require_relative "yarp/serialize"
|
||||
require_relative "yarp/pack"
|
||||
require "yarp.so"
|
||||
|
||||
module YARP
|
||||
class << self
|
||||
private :_parse
|
||||
end
|
||||
end
|
||||
|
@ -534,12 +534,11 @@ module YARP
|
||||
end
|
||||
end
|
||||
|
||||
attr_reader :source, :offsets, :filepath
|
||||
attr_reader :source, :filepath
|
||||
|
||||
def initialize(source, filepath = "")
|
||||
@source = source
|
||||
@filepath = filepath || ""
|
||||
@offsets = find_offsets(source)
|
||||
end
|
||||
|
||||
def result
|
||||
@ -561,7 +560,8 @@ module YARP
|
||||
result_value[0][0].value.prepend("\xEF\xBB\xBF") if bom
|
||||
|
||||
result_value.each_with_index do |(token, lex_state), index|
|
||||
(lineno, column) = find_location(token.location.start_offset)
|
||||
lineno = token.location.start_line
|
||||
column = token.location.start_column
|
||||
column -= index == 0 ? 6 : 3 if bom && lineno == 1
|
||||
|
||||
event = RIPPER.fetch(token.type)
|
||||
@ -702,38 +702,6 @@ module YARP
|
||||
|
||||
ParseResult.new(tokens, result.comments, result.errors, result.warnings)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# YARP keeps locations around in the form of ranges of byte offsets from the
|
||||
# start of the file. Ripper keeps locations around in the form of line and
|
||||
# column numbers. To match the output, we keep a cache of the offsets at the
|
||||
# beginning of each line.
|
||||
def find_offsets(source)
|
||||
last_offset = 0
|
||||
offsets = [0]
|
||||
|
||||
source.each_line do |line|
|
||||
last_offset += line.bytesize
|
||||
offsets << last_offset
|
||||
end
|
||||
|
||||
offsets
|
||||
end
|
||||
|
||||
# Given a byte offset, find the line number and column number that it maps
|
||||
# to. We use a binary search over the cached offsets to find the line number
|
||||
# that the offset is on, and then subtract the offset of the previous line
|
||||
# to find the column number.
|
||||
def find_location(value)
|
||||
line_number = offsets.bsearch_index { |offset| offset > value }
|
||||
line_offset = offsets[line_number - 1] if line_number
|
||||
|
||||
[
|
||||
line_number || offsets.length - 1,
|
||||
value - (line_offset || offsets.last)
|
||||
]
|
||||
end
|
||||
end
|
||||
|
||||
# The constant that wraps the behavior of the lexer to match Ripper's output
|
||||
@ -746,4 +714,39 @@ module YARP
|
||||
def self.lex_compat(source, filepath = "")
|
||||
LexCompat.new(source, filepath).result
|
||||
end
|
||||
|
||||
# This lexes with the Ripper lex. It drops any space events but otherwise
|
||||
# returns the same tokens. Raises SyntaxError if the syntax in source is
|
||||
# invalid.
|
||||
def self.lex_ripper(source)
|
||||
previous = []
|
||||
results = []
|
||||
|
||||
Ripper.lex(source, raise_errors: true).each do |token|
|
||||
case token[1]
|
||||
when :on_sp
|
||||
# skip
|
||||
when :on_tstring_content
|
||||
if previous[1] == :on_tstring_content &&
|
||||
(token[2].start_with?("\#$") || token[2].start_with?("\#@"))
|
||||
previous[2] << token[2]
|
||||
else
|
||||
results << token
|
||||
previous = token
|
||||
end
|
||||
when :on_words_sep
|
||||
if previous[1] == :on_words_sep
|
||||
previous[2] << token[2]
|
||||
else
|
||||
results << token
|
||||
previous = token
|
||||
end
|
||||
else
|
||||
results << token
|
||||
previous = token
|
||||
end
|
||||
end
|
||||
|
||||
results
|
||||
end
|
||||
end
|
||||
|
2283
lib/yarp/node.rb
2283
lib/yarp/node.rb
File diff suppressed because it is too large
Load Diff
@ -9,26 +9,30 @@ require "stringio"
|
||||
|
||||
module YARP
|
||||
module Serialize
|
||||
def self.load(source, serialized)
|
||||
def self.load(input, serialized)
|
||||
io = StringIO.new(serialized)
|
||||
io.set_encoding(Encoding::BINARY)
|
||||
|
||||
Loader.new(source, serialized, io).load
|
||||
Loader.new(input, serialized, io).load
|
||||
end
|
||||
|
||||
class Loader
|
||||
attr_reader :encoding, :source, :serialized, :io
|
||||
attr_reader :constant_pool_offset, :constant_pool
|
||||
attr_reader :encoding, :input, :serialized, :io
|
||||
attr_reader :constant_pool_offset, :constant_pool, :source
|
||||
|
||||
def initialize(source, serialized, io)
|
||||
def initialize(input, serialized, io)
|
||||
@encoding = Encoding::UTF_8
|
||||
|
||||
@source = source.dup
|
||||
@input = input.dup
|
||||
@serialized = serialized
|
||||
@io = io
|
||||
|
||||
@constant_pool_offset = nil
|
||||
@constant_pool = nil
|
||||
|
||||
offsets = [0]
|
||||
input.b.scan("\n") { offsets << $~.end(0) }
|
||||
@source = Source.new(input, offsets)
|
||||
end
|
||||
|
||||
def load
|
||||
@ -36,7 +40,7 @@ module YARP
|
||||
io.read(3).unpack("C3") => [0, 4, 0]
|
||||
|
||||
@encoding = Encoding.find(io.read(load_varint))
|
||||
@source = source.force_encoding(@encoding).freeze
|
||||
@input = input.force_encoding(@encoding).freeze
|
||||
|
||||
@constant_pool_offset = io.read(4).unpack1("L")
|
||||
@constant_pool = Array.new(load_varint, nil)
|
||||
@ -78,7 +82,7 @@ module YARP
|
||||
end
|
||||
|
||||
def load_location
|
||||
Location.new(load_varint, load_varint)
|
||||
Location.new(source, load_varint, load_varint)
|
||||
end
|
||||
|
||||
def load_optional_location
|
||||
@ -95,7 +99,7 @@ module YARP
|
||||
start = serialized.unpack1("L", offset: offset)
|
||||
length = serialized.unpack1("L", offset: offset + 4)
|
||||
|
||||
constant = source.byteslice(start, length).to_sym
|
||||
constant = input.byteslice(start, length).to_sym
|
||||
constant_pool[index] = constant
|
||||
end
|
||||
|
||||
@ -104,262 +108,262 @@ module YARP
|
||||
|
||||
def load_node
|
||||
type = io.getbyte
|
||||
start_offset, length = load_varint, load_varint
|
||||
location = load_location
|
||||
|
||||
case type
|
||||
when 1 then
|
||||
AliasNode.new(load_node, load_node, load_location, start_offset, length)
|
||||
AliasNode.new(load_node, load_node, load_location, location)
|
||||
when 2 then
|
||||
AlternationPatternNode.new(load_node, load_node, load_location, start_offset, length)
|
||||
AlternationPatternNode.new(load_node, load_node, load_location, location)
|
||||
when 3 then
|
||||
AndNode.new(load_node, load_node, load_location, start_offset, length)
|
||||
AndNode.new(load_node, load_node, load_location, location)
|
||||
when 4 then
|
||||
ArgumentsNode.new(Array.new(load_varint) { load_node }, start_offset, length)
|
||||
ArgumentsNode.new(Array.new(load_varint) { load_node }, location)
|
||||
when 5 then
|
||||
ArrayNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, start_offset, length)
|
||||
ArrayNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, location)
|
||||
when 6 then
|
||||
ArrayPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, start_offset, length)
|
||||
ArrayPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, location)
|
||||
when 7 then
|
||||
AssocNode.new(load_node, load_optional_node, load_optional_location, start_offset, length)
|
||||
AssocNode.new(load_node, load_optional_node, load_optional_location, location)
|
||||
when 8 then
|
||||
AssocSplatNode.new(load_optional_node, load_location, start_offset, length)
|
||||
AssocSplatNode.new(load_optional_node, load_location, location)
|
||||
when 9 then
|
||||
BackReferenceReadNode.new(start_offset, length)
|
||||
BackReferenceReadNode.new(location)
|
||||
when 10 then
|
||||
BeginNode.new(load_optional_location, load_optional_node, load_optional_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length)
|
||||
BeginNode.new(load_optional_location, load_optional_node, load_optional_node, load_optional_node, load_optional_node, load_optional_location, location)
|
||||
when 11 then
|
||||
BlockArgumentNode.new(load_optional_node, load_location, start_offset, length)
|
||||
BlockArgumentNode.new(load_optional_node, load_location, location)
|
||||
when 12 then
|
||||
BlockNode.new(Array.new(load_varint) { load_constant }, load_optional_node, load_optional_node, load_location, load_location, start_offset, length)
|
||||
BlockNode.new(Array.new(load_varint) { load_constant }, load_optional_node, load_optional_node, load_location, load_location, location)
|
||||
when 13 then
|
||||
BlockParameterNode.new(load_optional_location, load_location, start_offset, length)
|
||||
BlockParameterNode.new(load_optional_location, load_location, location)
|
||||
when 14 then
|
||||
BlockParametersNode.new(load_optional_node, Array.new(load_varint) { load_location }, load_optional_location, load_optional_location, start_offset, length)
|
||||
BlockParametersNode.new(load_optional_node, Array.new(load_varint) { load_location }, load_optional_location, load_optional_location, location)
|
||||
when 15 then
|
||||
BreakNode.new(load_optional_node, load_location, start_offset, length)
|
||||
BreakNode.new(load_optional_node, load_location, location)
|
||||
when 16 then
|
||||
CallNode.new(load_optional_node, load_optional_location, load_optional_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, load_varint, load_string, start_offset, length)
|
||||
CallNode.new(load_optional_node, load_optional_location, load_optional_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, load_varint, load_string, location)
|
||||
when 17 then
|
||||
CallOperatorAndWriteNode.new(load_node, load_location, load_node, start_offset, length)
|
||||
CallOperatorAndWriteNode.new(load_node, load_location, load_node, location)
|
||||
when 18 then
|
||||
CallOperatorOrWriteNode.new(load_node, load_node, load_location, start_offset, length)
|
||||
CallOperatorOrWriteNode.new(load_node, load_node, load_location, location)
|
||||
when 19 then
|
||||
CallOperatorWriteNode.new(load_node, load_location, load_node, load_constant, start_offset, length)
|
||||
CallOperatorWriteNode.new(load_node, load_location, load_node, load_constant, location)
|
||||
when 20 then
|
||||
CapturePatternNode.new(load_node, load_node, load_location, start_offset, length)
|
||||
CapturePatternNode.new(load_node, load_node, load_location, location)
|
||||
when 21 then
|
||||
CaseNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_location, load_location, start_offset, length)
|
||||
CaseNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_location, load_location, location)
|
||||
when 22 then
|
||||
ClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_location, load_optional_node, load_optional_node, load_location, start_offset, length)
|
||||
ClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_location, load_optional_node, load_optional_node, load_location, location)
|
||||
when 23 then
|
||||
ClassVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
|
||||
ClassVariableOperatorAndWriteNode.new(load_location, load_location, load_node, location)
|
||||
when 24 then
|
||||
ClassVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
|
||||
ClassVariableOperatorOrWriteNode.new(load_location, load_location, load_node, location)
|
||||
when 25 then
|
||||
ClassVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
|
||||
ClassVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
|
||||
when 26 then
|
||||
ClassVariableReadNode.new(start_offset, length)
|
||||
ClassVariableReadNode.new(location)
|
||||
when 27 then
|
||||
ClassVariableWriteNode.new(load_location, load_optional_node, load_optional_location, start_offset, length)
|
||||
ClassVariableWriteNode.new(load_location, load_optional_node, load_optional_location, location)
|
||||
when 28 then
|
||||
ConstantOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
|
||||
ConstantOperatorAndWriteNode.new(load_location, load_location, load_node, location)
|
||||
when 29 then
|
||||
ConstantOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
|
||||
ConstantOperatorOrWriteNode.new(load_location, load_location, load_node, location)
|
||||
when 30 then
|
||||
ConstantOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
|
||||
ConstantOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
|
||||
when 31 then
|
||||
ConstantPathNode.new(load_optional_node, load_node, load_location, start_offset, length)
|
||||
ConstantPathNode.new(load_optional_node, load_node, load_location, location)
|
||||
when 32 then
|
||||
ConstantPathOperatorAndWriteNode.new(load_node, load_location, load_node, start_offset, length)
|
||||
ConstantPathOperatorAndWriteNode.new(load_node, load_location, load_node, location)
|
||||
when 33 then
|
||||
ConstantPathOperatorOrWriteNode.new(load_node, load_location, load_node, start_offset, length)
|
||||
ConstantPathOperatorOrWriteNode.new(load_node, load_location, load_node, location)
|
||||
when 34 then
|
||||
ConstantPathOperatorWriteNode.new(load_node, load_location, load_node, load_constant, start_offset, length)
|
||||
ConstantPathOperatorWriteNode.new(load_node, load_location, load_node, load_constant, location)
|
||||
when 35 then
|
||||
ConstantPathWriteNode.new(load_node, load_optional_location, load_optional_node, start_offset, length)
|
||||
ConstantPathWriteNode.new(load_node, load_optional_location, load_optional_node, location)
|
||||
when 36 then
|
||||
ConstantReadNode.new(start_offset, length)
|
||||
ConstantReadNode.new(location)
|
||||
when 37 then
|
||||
load_serialized_length
|
||||
DefNode.new(load_location, load_optional_node, load_optional_node, load_optional_node, Array.new(load_varint) { load_constant }, load_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, start_offset, length)
|
||||
DefNode.new(load_location, load_optional_node, load_optional_node, load_optional_node, Array.new(load_varint) { load_constant }, load_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, location)
|
||||
when 38 then
|
||||
DefinedNode.new(load_optional_location, load_node, load_optional_location, load_location, start_offset, length)
|
||||
DefinedNode.new(load_optional_location, load_node, load_optional_location, load_location, location)
|
||||
when 39 then
|
||||
ElseNode.new(load_location, load_optional_node, load_optional_location, start_offset, length)
|
||||
ElseNode.new(load_location, load_optional_node, load_optional_location, location)
|
||||
when 40 then
|
||||
EmbeddedStatementsNode.new(load_location, load_optional_node, load_location, start_offset, length)
|
||||
EmbeddedStatementsNode.new(load_location, load_optional_node, load_location, location)
|
||||
when 41 then
|
||||
EmbeddedVariableNode.new(load_location, load_node, start_offset, length)
|
||||
EmbeddedVariableNode.new(load_location, load_node, location)
|
||||
when 42 then
|
||||
EnsureNode.new(load_location, load_optional_node, load_location, start_offset, length)
|
||||
EnsureNode.new(load_location, load_optional_node, load_location, location)
|
||||
when 43 then
|
||||
FalseNode.new(start_offset, length)
|
||||
FalseNode.new(location)
|
||||
when 44 then
|
||||
FindPatternNode.new(load_optional_node, load_node, Array.new(load_varint) { load_node }, load_node, load_optional_location, load_optional_location, start_offset, length)
|
||||
FindPatternNode.new(load_optional_node, load_node, Array.new(load_varint) { load_node }, load_node, load_optional_location, load_optional_location, location)
|
||||
when 45 then
|
||||
FloatNode.new(start_offset, length)
|
||||
FloatNode.new(location)
|
||||
when 46 then
|
||||
ForNode.new(load_node, load_node, load_optional_node, load_location, load_location, load_optional_location, load_location, start_offset, length)
|
||||
ForNode.new(load_node, load_node, load_optional_node, load_location, load_location, load_optional_location, load_location, location)
|
||||
when 47 then
|
||||
ForwardingArgumentsNode.new(start_offset, length)
|
||||
ForwardingArgumentsNode.new(location)
|
||||
when 48 then
|
||||
ForwardingParameterNode.new(start_offset, length)
|
||||
ForwardingParameterNode.new(location)
|
||||
when 49 then
|
||||
ForwardingSuperNode.new(load_optional_node, start_offset, length)
|
||||
ForwardingSuperNode.new(load_optional_node, location)
|
||||
when 50 then
|
||||
GlobalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
|
||||
GlobalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, location)
|
||||
when 51 then
|
||||
GlobalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
|
||||
GlobalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, location)
|
||||
when 52 then
|
||||
GlobalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
|
||||
GlobalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
|
||||
when 53 then
|
||||
GlobalVariableReadNode.new(start_offset, length)
|
||||
GlobalVariableReadNode.new(location)
|
||||
when 54 then
|
||||
GlobalVariableWriteNode.new(load_location, load_optional_location, load_optional_node, start_offset, length)
|
||||
GlobalVariableWriteNode.new(load_location, load_optional_location, load_optional_node, location)
|
||||
when 55 then
|
||||
HashNode.new(load_location, Array.new(load_varint) { load_node }, load_location, start_offset, length)
|
||||
HashNode.new(load_location, Array.new(load_varint) { load_node }, load_location, location)
|
||||
when 56 then
|
||||
HashPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_location, load_optional_location, start_offset, length)
|
||||
HashPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_location, load_optional_location, location)
|
||||
when 57 then
|
||||
IfNode.new(load_optional_location, load_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length)
|
||||
IfNode.new(load_optional_location, load_node, load_optional_node, load_optional_node, load_optional_location, location)
|
||||
when 58 then
|
||||
ImaginaryNode.new(load_node, start_offset, length)
|
||||
ImaginaryNode.new(load_node, location)
|
||||
when 59 then
|
||||
InNode.new(load_node, load_optional_node, load_location, load_optional_location, start_offset, length)
|
||||
InNode.new(load_node, load_optional_node, load_location, load_optional_location, location)
|
||||
when 60 then
|
||||
InstanceVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
|
||||
InstanceVariableOperatorAndWriteNode.new(load_location, load_location, load_node, location)
|
||||
when 61 then
|
||||
InstanceVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
|
||||
InstanceVariableOperatorOrWriteNode.new(load_location, load_location, load_node, location)
|
||||
when 62 then
|
||||
InstanceVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
|
||||
InstanceVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
|
||||
when 63 then
|
||||
InstanceVariableReadNode.new(start_offset, length)
|
||||
InstanceVariableReadNode.new(location)
|
||||
when 64 then
|
||||
InstanceVariableWriteNode.new(load_location, load_optional_node, load_optional_location, start_offset, length)
|
||||
InstanceVariableWriteNode.new(load_location, load_optional_node, load_optional_location, location)
|
||||
when 65 then
|
||||
IntegerNode.new(start_offset, length)
|
||||
IntegerNode.new(location)
|
||||
when 66 then
|
||||
InterpolatedRegularExpressionNode.new(load_location, Array.new(load_varint) { load_node }, load_location, load_varint, start_offset, length)
|
||||
InterpolatedRegularExpressionNode.new(load_location, Array.new(load_varint) { load_node }, load_location, load_varint, location)
|
||||
when 67 then
|
||||
InterpolatedStringNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, start_offset, length)
|
||||
InterpolatedStringNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, location)
|
||||
when 68 then
|
||||
InterpolatedSymbolNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, start_offset, length)
|
||||
InterpolatedSymbolNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, location)
|
||||
when 69 then
|
||||
InterpolatedXStringNode.new(load_location, Array.new(load_varint) { load_node }, load_location, start_offset, length)
|
||||
InterpolatedXStringNode.new(load_location, Array.new(load_varint) { load_node }, load_location, location)
|
||||
when 70 then
|
||||
KeywordHashNode.new(Array.new(load_varint) { load_node }, start_offset, length)
|
||||
KeywordHashNode.new(Array.new(load_varint) { load_node }, location)
|
||||
when 71 then
|
||||
KeywordParameterNode.new(load_location, load_optional_node, start_offset, length)
|
||||
KeywordParameterNode.new(load_location, load_optional_node, location)
|
||||
when 72 then
|
||||
KeywordRestParameterNode.new(load_location, load_optional_location, start_offset, length)
|
||||
KeywordRestParameterNode.new(load_location, load_optional_location, location)
|
||||
when 73 then
|
||||
LambdaNode.new(Array.new(load_varint) { load_constant }, load_location, load_optional_node, load_optional_node, start_offset, length)
|
||||
LambdaNode.new(Array.new(load_varint) { load_constant }, load_location, load_optional_node, load_optional_node, location)
|
||||
when 74 then
|
||||
LocalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
|
||||
LocalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, load_constant, location)
|
||||
when 75 then
|
||||
LocalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
|
||||
LocalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, load_constant, location)
|
||||
when 76 then
|
||||
LocalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, load_constant, start_offset, length)
|
||||
LocalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, load_constant, location)
|
||||
when 77 then
|
||||
LocalVariableReadNode.new(load_constant, load_varint, start_offset, length)
|
||||
LocalVariableReadNode.new(load_constant, load_varint, location)
|
||||
when 78 then
|
||||
LocalVariableWriteNode.new(load_constant, load_varint, load_optional_node, load_location, load_optional_location, start_offset, length)
|
||||
LocalVariableWriteNode.new(load_constant, load_varint, load_optional_node, load_location, load_optional_location, location)
|
||||
when 79 then
|
||||
MatchPredicateNode.new(load_node, load_node, load_location, start_offset, length)
|
||||
MatchPredicateNode.new(load_node, load_node, load_location, location)
|
||||
when 80 then
|
||||
MatchRequiredNode.new(load_node, load_node, load_location, start_offset, length)
|
||||
MatchRequiredNode.new(load_node, load_node, load_location, location)
|
||||
when 81 then
|
||||
MissingNode.new(start_offset, length)
|
||||
MissingNode.new(location)
|
||||
when 82 then
|
||||
ModuleNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_node, load_location, start_offset, length)
|
||||
ModuleNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_node, load_location, location)
|
||||
when 83 then
|
||||
MultiWriteNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_location, load_optional_location, start_offset, length)
|
||||
MultiWriteNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_location, load_optional_location, location)
|
||||
when 84 then
|
||||
NextNode.new(load_optional_node, load_location, start_offset, length)
|
||||
NextNode.new(load_optional_node, load_location, location)
|
||||
when 85 then
|
||||
NilNode.new(start_offset, length)
|
||||
NilNode.new(location)
|
||||
when 86 then
|
||||
NoKeywordsParameterNode.new(load_location, load_location, start_offset, length)
|
||||
NoKeywordsParameterNode.new(load_location, load_location, location)
|
||||
when 87 then
|
||||
NumberedReferenceReadNode.new(start_offset, length)
|
||||
NumberedReferenceReadNode.new(location)
|
||||
when 88 then
|
||||
OptionalParameterNode.new(load_constant, load_location, load_location, load_node, start_offset, length)
|
||||
OptionalParameterNode.new(load_constant, load_location, load_location, load_node, location)
|
||||
when 89 then
|
||||
OrNode.new(load_node, load_node, load_location, start_offset, length)
|
||||
OrNode.new(load_node, load_node, load_location, location)
|
||||
when 90 then
|
||||
ParametersNode.new(Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_node, start_offset, length)
|
||||
ParametersNode.new(Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_node, location)
|
||||
when 91 then
|
||||
ParenthesesNode.new(load_optional_node, load_location, load_location, start_offset, length)
|
||||
ParenthesesNode.new(load_optional_node, load_location, load_location, location)
|
||||
when 92 then
|
||||
PinnedExpressionNode.new(load_node, load_location, load_location, load_location, start_offset, length)
|
||||
PinnedExpressionNode.new(load_node, load_location, load_location, load_location, location)
|
||||
when 93 then
|
||||
PinnedVariableNode.new(load_node, load_location, start_offset, length)
|
||||
PinnedVariableNode.new(load_node, load_location, location)
|
||||
when 94 then
|
||||
PostExecutionNode.new(load_optional_node, load_location, load_location, load_location, start_offset, length)
|
||||
PostExecutionNode.new(load_optional_node, load_location, load_location, load_location, location)
|
||||
when 95 then
|
||||
PreExecutionNode.new(load_optional_node, load_location, load_location, load_location, start_offset, length)
|
||||
PreExecutionNode.new(load_optional_node, load_location, load_location, load_location, location)
|
||||
when 96 then
|
||||
ProgramNode.new(Array.new(load_varint) { load_constant }, load_node, start_offset, length)
|
||||
ProgramNode.new(Array.new(load_varint) { load_constant }, load_node, location)
|
||||
when 97 then
|
||||
RangeNode.new(load_optional_node, load_optional_node, load_location, load_varint, start_offset, length)
|
||||
RangeNode.new(load_optional_node, load_optional_node, load_location, load_varint, location)
|
||||
when 98 then
|
||||
RationalNode.new(load_node, start_offset, length)
|
||||
RationalNode.new(load_node, location)
|
||||
when 99 then
|
||||
RedoNode.new(start_offset, length)
|
||||
RedoNode.new(location)
|
||||
when 100 then
|
||||
RegularExpressionNode.new(load_location, load_location, load_location, load_string, load_varint, start_offset, length)
|
||||
RegularExpressionNode.new(load_location, load_location, load_location, load_string, load_varint, location)
|
||||
when 101 then
|
||||
RequiredDestructuredParameterNode.new(Array.new(load_varint) { load_node }, load_location, load_location, start_offset, length)
|
||||
RequiredDestructuredParameterNode.new(Array.new(load_varint) { load_node }, load_location, load_location, location)
|
||||
when 102 then
|
||||
RequiredParameterNode.new(load_constant, start_offset, length)
|
||||
RequiredParameterNode.new(load_constant, location)
|
||||
when 103 then
|
||||
RescueModifierNode.new(load_node, load_location, load_node, start_offset, length)
|
||||
RescueModifierNode.new(load_node, load_location, load_node, location)
|
||||
when 104 then
|
||||
RescueNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_node, load_optional_node, start_offset, length)
|
||||
RescueNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_node, load_optional_node, location)
|
||||
when 105 then
|
||||
RestParameterNode.new(load_location, load_optional_location, start_offset, length)
|
||||
RestParameterNode.new(load_location, load_optional_location, location)
|
||||
when 106 then
|
||||
RetryNode.new(start_offset, length)
|
||||
RetryNode.new(location)
|
||||
when 107 then
|
||||
ReturnNode.new(load_location, load_optional_node, start_offset, length)
|
||||
ReturnNode.new(load_location, load_optional_node, location)
|
||||
when 108 then
|
||||
SelfNode.new(start_offset, length)
|
||||
SelfNode.new(location)
|
||||
when 109 then
|
||||
SingletonClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_location, load_node, load_optional_node, load_location, start_offset, length)
|
||||
SingletonClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_location, load_node, load_optional_node, load_location, location)
|
||||
when 110 then
|
||||
SourceEncodingNode.new(start_offset, length)
|
||||
SourceEncodingNode.new(location)
|
||||
when 111 then
|
||||
SourceFileNode.new(load_string, start_offset, length)
|
||||
SourceFileNode.new(load_string, location)
|
||||
when 112 then
|
||||
SourceLineNode.new(start_offset, length)
|
||||
SourceLineNode.new(location)
|
||||
when 113 then
|
||||
SplatNode.new(load_location, load_optional_node, start_offset, length)
|
||||
SplatNode.new(load_location, load_optional_node, location)
|
||||
when 114 then
|
||||
StatementsNode.new(Array.new(load_varint) { load_node }, start_offset, length)
|
||||
StatementsNode.new(Array.new(load_varint) { load_node }, location)
|
||||
when 115 then
|
||||
StringConcatNode.new(load_node, load_node, start_offset, length)
|
||||
StringConcatNode.new(load_node, load_node, location)
|
||||
when 116 then
|
||||
StringNode.new(load_optional_location, load_location, load_optional_location, load_string, start_offset, length)
|
||||
StringNode.new(load_optional_location, load_location, load_optional_location, load_string, location)
|
||||
when 117 then
|
||||
SuperNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, start_offset, length)
|
||||
SuperNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, location)
|
||||
when 118 then
|
||||
SymbolNode.new(load_optional_location, load_location, load_optional_location, load_string, start_offset, length)
|
||||
SymbolNode.new(load_optional_location, load_location, load_optional_location, load_string, location)
|
||||
when 119 then
|
||||
TrueNode.new(start_offset, length)
|
||||
TrueNode.new(location)
|
||||
when 120 then
|
||||
UndefNode.new(Array.new(load_varint) { load_node }, load_location, start_offset, length)
|
||||
UndefNode.new(Array.new(load_varint) { load_node }, load_location, location)
|
||||
when 121 then
|
||||
UnlessNode.new(load_location, load_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length)
|
||||
UnlessNode.new(load_location, load_node, load_optional_node, load_optional_node, load_optional_location, location)
|
||||
when 122 then
|
||||
UntilNode.new(load_location, load_node, load_optional_node, start_offset, length)
|
||||
UntilNode.new(load_location, load_node, load_optional_node, location)
|
||||
when 123 then
|
||||
WhenNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_node, start_offset, length)
|
||||
WhenNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_node, location)
|
||||
when 124 then
|
||||
WhileNode.new(load_location, load_node, load_optional_node, start_offset, length)
|
||||
WhileNode.new(load_location, load_node, load_optional_node, location)
|
||||
when 125 then
|
||||
XStringNode.new(load_location, load_location, load_location, load_string, start_offset, length)
|
||||
XStringNode.new(load_location, load_location, load_location, load_string, location)
|
||||
when 126 then
|
||||
YieldNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, start_offset, length)
|
||||
YieldNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, location)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -1,212 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require "yarp_test_helper"
|
||||
|
||||
class CompileTest < Test::Unit::TestCase
|
||||
def test_AliasNode
|
||||
assert_compiles("alias foo bar")
|
||||
end
|
||||
|
||||
def test_AndNode
|
||||
assert_compiles("true && false")
|
||||
end
|
||||
|
||||
def test_ArrayNode
|
||||
assert_compiles("[]")
|
||||
assert_compiles("[foo, bar, baz]")
|
||||
end
|
||||
|
||||
def test_AssocNode
|
||||
assert_compiles("{ foo: bar }")
|
||||
end
|
||||
|
||||
def test_BlockNode
|
||||
assert_compiles("foo { bar }")
|
||||
end
|
||||
|
||||
def test_BlockNode_with_optionals
|
||||
assert_compiles("foo { |x = 1| bar }")
|
||||
end
|
||||
|
||||
def test_CallNode
|
||||
assert_compiles("foo")
|
||||
assert_compiles("foo(bar)")
|
||||
end
|
||||
|
||||
def test_ClassVariableReadNode
|
||||
assert_compiles("@@foo")
|
||||
end
|
||||
|
||||
def test_ClassVariableWriteNode
|
||||
assert_compiles("@@foo = 1")
|
||||
end
|
||||
|
||||
def test_FalseNode
|
||||
assert_compiles("false")
|
||||
end
|
||||
|
||||
def test_GlobalVariableReadNode
|
||||
assert_compiles("$foo")
|
||||
end
|
||||
|
||||
def test_GlobalVariableWriteNode
|
||||
assert_compiles("$foo = 1")
|
||||
end
|
||||
|
||||
def test_HashNode
|
||||
assert_compiles("{ foo: bar }")
|
||||
end
|
||||
|
||||
def test_InstanceVariableReadNode
|
||||
assert_compiles("@foo")
|
||||
end
|
||||
|
||||
def test_InstanceVariableWriteNode
|
||||
assert_compiles("@foo = 1")
|
||||
end
|
||||
|
||||
def test_IntegerNode
|
||||
assert_compiles("1")
|
||||
assert_compiles("1_000")
|
||||
end
|
||||
|
||||
def test_InterpolatedStringNode
|
||||
assert_compiles("\"foo \#{bar} baz\"")
|
||||
end
|
||||
|
||||
def test_LocalVariableWriteNode
|
||||
assert_compiles("foo = 1")
|
||||
end
|
||||
|
||||
def test_LocalVariableReadNode
|
||||
assert_compiles("[foo = 1, foo]")
|
||||
end
|
||||
|
||||
def test_NilNode
|
||||
assert_compiles("nil")
|
||||
end
|
||||
|
||||
def test_OrNode
|
||||
assert_compiles("true || false")
|
||||
end
|
||||
|
||||
def test_ParenthesesNode
|
||||
assert_compiles("()")
|
||||
end
|
||||
|
||||
def test_ProgramNode
|
||||
assert_compiles("")
|
||||
end
|
||||
|
||||
def test_RangeNode
|
||||
assert_compiles("foo..bar")
|
||||
assert_compiles("foo...bar")
|
||||
assert_compiles("(foo..)")
|
||||
assert_compiles("(foo...)")
|
||||
assert_compiles("(..bar)")
|
||||
assert_compiles("(...bar)")
|
||||
end
|
||||
|
||||
def test_SelfNode
|
||||
assert_compiles("self")
|
||||
end
|
||||
|
||||
def test_StringNode
|
||||
assert_compiles("\"foo\"")
|
||||
end
|
||||
|
||||
def test_SymbolNode
|
||||
assert_compiles(":foo")
|
||||
end
|
||||
|
||||
def test_TrueNode
|
||||
assert_compiles("true")
|
||||
end
|
||||
|
||||
def test_UndefNode
|
||||
assert_compiles("undef :foo, :bar, :baz")
|
||||
end
|
||||
|
||||
def test_XStringNode
|
||||
assert_compiles("`foo`")
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def assert_compiles(source)
|
||||
assert_equal_iseqs(rubyvm_compile(source), YARP.compile(source))
|
||||
end
|
||||
|
||||
# Instruction sequences have 13 elements in their lists. We don't currently
|
||||
# support all of the fields, so we can't compare the iseqs directly. Instead,
|
||||
# we compare the elements that we do support.
|
||||
def assert_equal_iseqs(expected, actual)
|
||||
# The first element is the magic comment string.
|
||||
assert_equal expected[0], actual[0]
|
||||
|
||||
# The next three elements are the major, minor, and patch version numbers.
|
||||
# TODO: Insert this check once Ruby 3.3 is released, and the TruffleRuby
|
||||
# GitHub workflow also checks against Ruby 3.3
|
||||
# assert_equal expected[1...4], actual[1...4]
|
||||
|
||||
# The next element is a set of options for the iseq. It has lots of
|
||||
# different information, some of which we support and some of which we
|
||||
# don't.
|
||||
assert_equal expected[4][:arg_size], actual[4][:arg_size], "Unexpected difference in arg_size"
|
||||
assert_equal expected[4][:stack_max], actual[4][:stack_max], "Unexpected difference in stack_max"
|
||||
|
||||
assert_kind_of Integer, actual[4][:local_size]
|
||||
assert_kind_of Integer, actual[4][:node_id]
|
||||
|
||||
assert_equal expected[4][:code_location].length, actual[4][:code_location].length, "Unexpected difference in code_location length"
|
||||
assert_equal expected[4][:node_ids].length, actual[4][:node_ids].length, "Unexpected difference in node_ids length"
|
||||
|
||||
# Then we have the name of the iseq, the relative file path, the absolute
|
||||
# file path, and the line number. We don't have this working quite yet.
|
||||
assert_kind_of String, actual[5]
|
||||
assert_kind_of String, actual[6]
|
||||
assert_kind_of String, actual[7]
|
||||
assert_kind_of Integer, actual[8]
|
||||
|
||||
# Next we have the type of the iseq.
|
||||
assert_equal expected[9], actual[9]
|
||||
|
||||
# Next we have the list of local variables. We don't support this yet.
|
||||
assert_kind_of Array, actual[10]
|
||||
|
||||
# Next we have the argument options. These are used in block and method
|
||||
# iseqs to reflect how the arguments are passed.
|
||||
assert_equal expected[11], actual[11], "Unexpected difference in argument options"
|
||||
|
||||
# Next we have the catch table entries. We don't have this working yet.
|
||||
assert_kind_of Array, actual[12]
|
||||
|
||||
# Finally we have the actual instructions. We support some of this, but omit
|
||||
# line numbers and some tracepoint events.
|
||||
expected[13].each do |insn|
|
||||
case insn
|
||||
in [:send, opnds, expected_block] unless expected_block.nil?
|
||||
actual[13].shift => [:send, ^(opnds), actual_block]
|
||||
assert_equal_iseqs expected_block, actual_block
|
||||
in Array | :RUBY_EVENT_B_CALL | :RUBY_EVENT_B_RETURN | /^label_\d+/
|
||||
assert_equal insn, actual[13].shift
|
||||
in Integer | /^RUBY_EVENT_/
|
||||
# skip these for now
|
||||
else
|
||||
flunk "Unexpected instruction: #{insn.inspect}"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def rubyvm_compile(source)
|
||||
options = {
|
||||
peephole_optimization: false,
|
||||
specialized_instruction: false,
|
||||
operands_unification: false,
|
||||
instructions_unification: false,
|
||||
frozen_string_literal: false
|
||||
}
|
||||
|
||||
RubyVM::InstructionSequence.compile(source, **options).to_a
|
||||
end
|
||||
end
|
@ -55,6 +55,21 @@ class EncodingTest < Test::Unit::TestCase
|
||||
assert_equal Encoding.find("utf-8"), actual
|
||||
end
|
||||
|
||||
# This test may be a little confusing. Basically when we use our strpbrk, it
|
||||
# takes into account the encoding of the file.
|
||||
def test_strpbrk_multibyte
|
||||
result = YARP.parse(<<~RUBY)
|
||||
# encoding: Shift_JIS
|
||||
%w[\x81\x5c]
|
||||
RUBY
|
||||
|
||||
assert(result.errors.empty?)
|
||||
assert_equal(
|
||||
(+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
|
||||
result.value.statements.body.first.elements.first.unescaped
|
||||
)
|
||||
end
|
||||
|
||||
def test_utf_8_variations
|
||||
%w[
|
||||
utf-8-unix
|
||||
|
@ -18,3 +18,16 @@ not foo and
|
||||
|
||||
|
||||
bar
|
||||
|
||||
not(foo
|
||||
|
||||
|
||||
)
|
||||
|
||||
not(
|
||||
|
||||
|
||||
foo
|
||||
|
||||
|
||||
)
|
||||
|
@ -76,6 +76,7 @@ foo => Foo(*bar, baz, *qux)
|
||||
foo => Foo[]
|
||||
foo => Foo[1]
|
||||
foo => Foo[1, 2, 3]
|
||||
foo => Foo[Foo[]]
|
||||
foo => Foo[bar]
|
||||
foo => Foo[*bar, baz]
|
||||
foo => Foo[bar, *baz]
|
||||
|
@ -1,363 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require_relative "yarp_test_helper"
|
||||
require "yarp/language_server"
|
||||
|
||||
module YARP
|
||||
class LanguageServerTest < Test::Unit::TestCase
|
||||
module Request
|
||||
# Represents a hash pattern.
|
||||
class Shape
|
||||
attr_reader :values
|
||||
|
||||
def initialize(values)
|
||||
@values = values
|
||||
end
|
||||
|
||||
def ===(other)
|
||||
values.all? do |key, value|
|
||||
value == :any ? other.key?(key) : value === other[key]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Represents an array pattern.
|
||||
class Tuple
|
||||
attr_reader :values
|
||||
|
||||
def initialize(values)
|
||||
@values = values
|
||||
end
|
||||
|
||||
def ===(other)
|
||||
values.each_with_index.all? { |value, index| value === other[index] }
|
||||
end
|
||||
end
|
||||
|
||||
def self.[](value)
|
||||
case value
|
||||
when Array
|
||||
Tuple.new(value.map { |child| self[child] })
|
||||
when Hash
|
||||
Shape.new(value.transform_values { |child| self[child] })
|
||||
else
|
||||
value
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class Initialize < Struct.new(:id)
|
||||
def to_hash
|
||||
{ method: "initialize", id: id }
|
||||
end
|
||||
end
|
||||
|
||||
class Shutdown < Struct.new(:id)
|
||||
def to_hash
|
||||
{ method: "shutdown", id: id }
|
||||
end
|
||||
end
|
||||
|
||||
class TextDocumentDidOpen < Struct.new(:uri, :text)
|
||||
def to_hash
|
||||
{
|
||||
method: "textDocument/didOpen",
|
||||
params: { textDocument: { uri: uri, text: text } }
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
class TextDocumentDidChange < Struct.new(:uri, :text)
|
||||
def to_hash
|
||||
{
|
||||
method: "textDocument/didChange",
|
||||
params: {
|
||||
textDocument: { uri: uri },
|
||||
contentChanges: [{ text: text }]
|
||||
}
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
class TextDocumentDidClose < Struct.new(:uri)
|
||||
def to_hash
|
||||
{
|
||||
method: "textDocument/didClose",
|
||||
params: { textDocument: { uri: uri } }
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
class TextDocumentCodeAction < Struct.new(:id, :uri, :diagnostics)
|
||||
def to_hash
|
||||
{
|
||||
method: "textDocument/codeAction",
|
||||
id: id,
|
||||
params: {
|
||||
textDocument: { uri: uri },
|
||||
context: {
|
||||
diagnostics: diagnostics,
|
||||
},
|
||||
},
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
class TextDocumentDiagnostic < Struct.new(:id, :uri)
|
||||
def to_hash
|
||||
{
|
||||
method: "textDocument/diagnostic",
|
||||
id: id,
|
||||
params: {
|
||||
textDocument: { uri: uri },
|
||||
}
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def test_reading_file
|
||||
Tempfile.create(%w[test- .rb]) do |file|
|
||||
file.write("class Foo; end")
|
||||
file.rewind
|
||||
|
||||
responses = run_server([
|
||||
Initialize.new(1),
|
||||
Shutdown.new(3)
|
||||
])
|
||||
|
||||
shape = Request[[
|
||||
{ id: 1, result: { capabilities: Hash } },
|
||||
{ id: 3, result: {} }
|
||||
]]
|
||||
|
||||
assert_operator(shape, :===, responses)
|
||||
end
|
||||
end
|
||||
|
||||
def test_clean_shutdown
|
||||
responses = run_server([Initialize.new(1), Shutdown.new(2)])
|
||||
|
||||
shape = Request[[
|
||||
{ id: 1, result: { capabilities: Hash } },
|
||||
{ id: 2, result: {} }
|
||||
]]
|
||||
|
||||
assert_operator(shape, :===, responses)
|
||||
end
|
||||
|
||||
def test_file_that_does_not_exist
|
||||
responses = run_server([
|
||||
Initialize.new(1),
|
||||
Shutdown.new(3)
|
||||
])
|
||||
|
||||
shape = Request[[
|
||||
{ id: 1, result: { capabilities: Hash } },
|
||||
{ id: 3, result: {} }
|
||||
]]
|
||||
|
||||
assert_operator(shape, :===, responses)
|
||||
end
|
||||
|
||||
def test_code_action_request
|
||||
message = "this is an error"
|
||||
diagnostic = {
|
||||
range: { start: { line: 0, character: 0 }, end: { line: 0, character: 0 } },
|
||||
message: message,
|
||||
severity: 1,
|
||||
}
|
||||
responses = run_server([
|
||||
Initialize.new(1),
|
||||
TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
|
||||
1 + (
|
||||
RUBY
|
||||
TextDocumentCodeAction.new(2, "file:///path/to/file.rb", [diagnostic]),
|
||||
Shutdown.new(3)
|
||||
])
|
||||
|
||||
shape = Request[[
|
||||
{ id: 1, result: { capabilities: Hash } },
|
||||
{ id: 2, result: [
|
||||
{
|
||||
title: "Report incorrect error: `#{message}`",
|
||||
kind: "quickfix",
|
||||
diagnostics: [diagnostic],
|
||||
command: {
|
||||
title: "Report incorrect error",
|
||||
command: "vscode.open",
|
||||
arguments: [String]
|
||||
}
|
||||
}
|
||||
],
|
||||
},
|
||||
{ id: 3, result: {} }
|
||||
]]
|
||||
|
||||
assert_operator(shape, :===, responses)
|
||||
assert(responses.dig(1, :result, 0, :command, :arguments, 0).include?(URI.encode_www_form_component(message)))
|
||||
end
|
||||
|
||||
def test_code_action_request_no_diagnostic
|
||||
responses = run_server([
|
||||
Initialize.new(1),
|
||||
TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
|
||||
1 + (
|
||||
RUBY
|
||||
TextDocumentCodeAction.new(2, "file:///path/to/file.rb", []),
|
||||
Shutdown.new(3)
|
||||
])
|
||||
|
||||
shape = Request[[
|
||||
{ id: 1, result: { capabilities: Hash } },
|
||||
{ id: 2, result: [] },
|
||||
{ id: 3, result: {} }
|
||||
]]
|
||||
|
||||
assert_operator(shape, :===, responses)
|
||||
end
|
||||
|
||||
def test_code_action_request_no_content
|
||||
message = "this is an error"
|
||||
diagnostic = {
|
||||
range: { start: { line: 0, character: 0 }, end: { line: 0, character: 0 } },
|
||||
message: message,
|
||||
severity: 1,
|
||||
}
|
||||
responses = run_server([
|
||||
Initialize.new(1),
|
||||
TextDocumentCodeAction.new(2, "file:///path/to/file.rb", [diagnostic]),
|
||||
Shutdown.new(3)
|
||||
])
|
||||
|
||||
shape = Request[[
|
||||
{ id: 1, result: { capabilities: Hash } },
|
||||
{ id: 2, result: nil },
|
||||
{ id: 3, result: {} }
|
||||
]]
|
||||
|
||||
assert_operator(shape, :===, responses)
|
||||
end
|
||||
|
||||
def test_diagnostics_request_error
|
||||
responses = run_server([
|
||||
Initialize.new(1),
|
||||
TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
|
||||
1 + (
|
||||
RUBY
|
||||
TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
|
||||
Shutdown.new(3)
|
||||
])
|
||||
|
||||
shape = Request[[
|
||||
{ id: 1, result: { capabilities: Hash } },
|
||||
{ id: 2, result: { kind: "full", items: [
|
||||
{
|
||||
range: {
|
||||
start: { line: Integer, character: Integer },
|
||||
end: { line: Integer, character: Integer }
|
||||
},
|
||||
message: String,
|
||||
severity: Integer
|
||||
},
|
||||
] } },
|
||||
{ id: 3, result: {} }
|
||||
]]
|
||||
|
||||
assert_operator(shape, :===, responses)
|
||||
assert(responses.dig(1, :result, :items).count { |item| item[:severity] == 1 } > 0)
|
||||
end
|
||||
|
||||
def test_diagnostics_request_warning
|
||||
responses = run_server([
|
||||
Initialize.new(1),
|
||||
TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
|
||||
a/b /c
|
||||
RUBY
|
||||
TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
|
||||
Shutdown.new(3)
|
||||
])
|
||||
|
||||
shape = Request[[
|
||||
{ id: 1, result: { capabilities: Hash } },
|
||||
{ id: 2, result: { kind: "full", items: [
|
||||
{
|
||||
range: {
|
||||
start: { line: Integer, character: Integer },
|
||||
end: { line: Integer, character: Integer }
|
||||
},
|
||||
message: String,
|
||||
severity: Integer
|
||||
},
|
||||
] } },
|
||||
{ id: 3, result: {} }
|
||||
]]
|
||||
|
||||
assert_operator(shape, :===, responses)
|
||||
assert(responses.dig(1, :result, :items).count { |item| item[:severity] == 2 } > 0)
|
||||
end
|
||||
|
||||
def test_diagnostics_request_nothing
|
||||
responses = run_server([
|
||||
Initialize.new(1),
|
||||
TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
|
||||
a = 1
|
||||
RUBY
|
||||
TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
|
||||
Shutdown.new(3)
|
||||
])
|
||||
|
||||
shape = Request[[
|
||||
{ id: 1, result: { capabilities: Hash } },
|
||||
{ id: 2, result: { kind: "full", items: [] } },
|
||||
{ id: 3, result: {} }
|
||||
]]
|
||||
|
||||
assert_operator(shape, :===, responses)
|
||||
assert_equal(0, responses.dig(1, :result, :items).size)
|
||||
end
|
||||
|
||||
def test_diagnostics_request_no_content
|
||||
responses = run_server([
|
||||
Initialize.new(1),
|
||||
TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
|
||||
Shutdown.new(3)
|
||||
])
|
||||
|
||||
shape = Request[[
|
||||
{ id: 1, result: { capabilities: Hash } },
|
||||
{ id: 2, result: nil },
|
||||
{ id: 3, result: {} }
|
||||
]]
|
||||
|
||||
assert_operator(shape, :===, responses)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def write(content)
|
||||
request = content.to_hash.merge(jsonrpc: "2.0").to_json
|
||||
"Content-Length: #{request.bytesize}\r\n\r\n#{request}"
|
||||
end
|
||||
|
||||
def read(content)
|
||||
[].tap do |messages|
|
||||
while (headers = content.gets("\r\n\r\n"))
|
||||
source = content.read(headers[/Content-Length: (\d+)/i, 1].to_i)
|
||||
messages << JSON.parse(source, symbolize_names: true)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def run_server(messages)
|
||||
input = StringIO.new(messages.map { |message| write(message) }.join)
|
||||
output = StringIO.new
|
||||
|
||||
LanguageServer.new(
|
||||
input: input,
|
||||
output: output,
|
||||
).run
|
||||
|
||||
read(output.tap(&:rewind))
|
||||
end
|
||||
end
|
||||
end
|
@ -3,10 +3,12 @@
|
||||
require "yarp_test_helper"
|
||||
|
||||
class ParseTest < Test::Unit::TestCase
|
||||
# Because we're reading the snapshots from disk, we need to make sure that
|
||||
# they're encoded as UTF-8. When certain settings are present this might not
|
||||
# always be the case (e.g., LANG=C or -Eascii-8bit). So here we force the
|
||||
# default external encoding for the duration of the test.
|
||||
# When we pretty-print the trees to compare against the snapshots, we want to
|
||||
# be certain that we print with the same external encoding. This is because
|
||||
# methods like Symbol#inspect take into account external encoding and it could
|
||||
# change how the snapshot is generated. On machines with certain settings
|
||||
# (like LANG=C or -Eascii-8bit) this could have been changed. So here we're
|
||||
# going to force it to be UTF-8 to keep the snapshots consistent.
|
||||
def setup
|
||||
@previous_default_external = Encoding.default_external
|
||||
ignore_warnings { Encoding.default_external = Encoding::UTF_8 }
|
||||
@ -29,20 +31,6 @@ class ParseTest < Test::Unit::TestCase
|
||||
seattlerb/pct_w_heredoc_interp_nested.txt
|
||||
]
|
||||
|
||||
# Because the filepath in SourceFileNodes is different from one maching to the
|
||||
# next, PP.pp(sexp, +"", 79) can have different results: both the path itself
|
||||
# and the line breaks based on the length of the path.
|
||||
def normalize_printed(printed)
|
||||
printed
|
||||
.gsub(
|
||||
/SourceFileNode \s*
|
||||
\(\s* (\d+\.\.\.\d+) \s*\) \s*
|
||||
\(\s* ("[^"]*") \s*\)
|
||||
/mx,
|
||||
'SourceFileNode(\1)(\2)')
|
||||
.gsub(__dir__, "")
|
||||
end
|
||||
|
||||
def find_source_file_node(node)
|
||||
if node.is_a?(YARP::SourceFileNode)
|
||||
node
|
||||
@ -79,27 +67,26 @@ class ParseTest < Test::Unit::TestCase
|
||||
# that is invalid Ruby.
|
||||
refute_nil Ripper.sexp_raw(source)
|
||||
|
||||
# Next, parse the source and print the value.
|
||||
result = YARP.parse_file(filepath)
|
||||
value = result.value
|
||||
printed = normalize_printed(PP.pp(value, +"", 79))
|
||||
|
||||
# Next, assert that there were no errors during parsing.
|
||||
assert_empty result.errors, value
|
||||
result = YARP.parse(source, relative)
|
||||
assert_empty result.errors
|
||||
|
||||
# Next, pretty print the source.
|
||||
printed = PP.pp(result.value, +"", 79)
|
||||
|
||||
if File.exist?(snapshot)
|
||||
normalized = normalize_printed(File.read(snapshot))
|
||||
saved = File.read(snapshot)
|
||||
|
||||
# If the snapshot file exists, but the printed value does not match the
|
||||
# snapshot, then update the snapshot file.
|
||||
if normalized != printed
|
||||
File.write(snapshot, normalized)
|
||||
if printed != saved
|
||||
File.write(snapshot, printed)
|
||||
warn("Updated snapshot at #{snapshot}.")
|
||||
end
|
||||
|
||||
# If the snapshot file exists, then assert that the printed value
|
||||
# matches the snapshot.
|
||||
assert_equal(normalized, printed)
|
||||
assert_equal(saved, printed)
|
||||
else
|
||||
# If the snapshot file does not yet exist, then write it out now.
|
||||
File.write(snapshot, printed)
|
||||
@ -108,11 +95,11 @@ class ParseTest < Test::Unit::TestCase
|
||||
|
||||
# Next, assert that the value can be serialized and deserialized without
|
||||
# changing the shape of the tree.
|
||||
assert_equal_nodes(value, YARP.load(source, YARP.dump(source, filepath)))
|
||||
assert_equal_nodes(result.value, YARP.load(source, YARP.dump(source, relative)))
|
||||
|
||||
# Next, assert that the newlines are in the expected places.
|
||||
expected_newlines = [0]
|
||||
source.b.scan("\n") { expected_newlines << $~.offset(0)[0] }
|
||||
source.b.scan("\n") { expected_newlines << $~.offset(0)[0] + 1 }
|
||||
assert_equal expected_newlines, YARP.newlines(source)
|
||||
|
||||
# Finally, assert that we can lex the source and get the same tokens as
|
||||
|
@ -101,6 +101,10 @@ class RegexpTest < Test::Unit::TestCase
|
||||
refute_nil(YARP.named_captures("(?#foo)"))
|
||||
end
|
||||
|
||||
def test_comments_with_escaped_parentheses
|
||||
refute_nil(YARP.named_captures("(?#foo\\)\\))"))
|
||||
end
|
||||
|
||||
def test_non_capturing_groups
|
||||
refute_nil(YARP.named_captures("(?:foo)"))
|
||||
end
|
||||
|
@ -102,7 +102,7 @@ ProgramNode(0...185)(
|
||||
StringNode(123...129)((123...125), (125...128), (128...129), "abc"),
|
||||
DefNode(131...149)(
|
||||
(144...145),
|
||||
SourceFileNode(135...143)("/fixtures/keyword_method_names.txt"),
|
||||
SourceFileNode(135...143)("keyword_method_names.txt"),
|
||||
nil,
|
||||
nil,
|
||||
[],
|
||||
|
@ -5,7 +5,7 @@ ProgramNode(0...51)(
|
||||
RetryNode(6...11)(),
|
||||
SelfNode(13...17)(),
|
||||
SourceEncodingNode(19...31)(),
|
||||
SourceFileNode(33...41)("/fixtures/keywords.txt"),
|
||||
SourceFileNode(33...41)("keywords.txt"),
|
||||
SourceLineNode(43...51)()]
|
||||
)
|
||||
)
|
||||
|
@ -1,6 +1,6 @@
|
||||
ProgramNode(0...125)(
|
||||
ProgramNode(0...156)(
|
||||
[],
|
||||
StatementsNode(0...125)(
|
||||
StatementsNode(0...156)(
|
||||
[AndNode(0...19)(
|
||||
CallNode(0...7)(
|
||||
CallNode(4...7)(nil, nil, (4...7), nil, nil, nil, nil, 0, "foo"),
|
||||
@ -146,6 +146,48 @@ ProgramNode(0...125)(
|
||||
"!"
|
||||
),
|
||||
(108...111)
|
||||
),
|
||||
CallNode(127...138)(
|
||||
CallNode(131...134)(
|
||||
nil,
|
||||
nil,
|
||||
(131...134),
|
||||
nil,
|
||||
nil,
|
||||
nil,
|
||||
nil,
|
||||
0,
|
||||
"foo"
|
||||
),
|
||||
nil,
|
||||
(127...130),
|
||||
(130...131),
|
||||
nil,
|
||||
(137...138),
|
||||
nil,
|
||||
0,
|
||||
"!"
|
||||
),
|
||||
CallNode(140...156)(
|
||||
CallNode(147...150)(
|
||||
nil,
|
||||
nil,
|
||||
(147...150),
|
||||
nil,
|
||||
nil,
|
||||
nil,
|
||||
nil,
|
||||
0,
|
||||
"foo"
|
||||
),
|
||||
nil,
|
||||
(140...143),
|
||||
(143...144),
|
||||
nil,
|
||||
(155...156),
|
||||
nil,
|
||||
0,
|
||||
"!"
|
||||
)]
|
||||
)
|
||||
)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2,7 +2,7 @@ ProgramNode(0...38)(
|
||||
[],
|
||||
StatementsNode(0...38)(
|
||||
[SourceEncodingNode(0...12)(),
|
||||
SourceFileNode(13...21)("/fixtures/unparser/corpus/literal/pragma.txt"),
|
||||
SourceFileNode(13...21)("unparser/corpus/literal/pragma.txt"),
|
||||
SourceLineNode(22...30)(),
|
||||
CallNode(31...38)(nil, nil, (31...38), nil, nil, nil, nil, 0, "__dir__")]
|
||||
)
|
||||
|
@ -3,7 +3,9 @@ ProgramNode(8...111)(
|
||||
StatementsNode(8...111)(
|
||||
[CaseNode(8...111)(
|
||||
ArrayNode(13...51)(
|
||||
[SourceFileNode(14...22)("/fixtures/whitequark/pattern_matching__FILE__LINE_literals.txt"),
|
||||
[SourceFileNode(14...22)(
|
||||
"whitequark/pattern_matching__FILE__LINE_literals.txt"
|
||||
),
|
||||
CallNode(24...36)(
|
||||
SourceLineNode(24...32)(),
|
||||
nil,
|
||||
@ -22,7 +24,9 @@ ProgramNode(8...111)(
|
||||
[InNode(62...99)(
|
||||
ArrayPatternNode(65...99)(
|
||||
nil,
|
||||
[SourceFileNode(66...74)("/fixtures/whitequark/pattern_matching__FILE__LINE_literals.txt"),
|
||||
[SourceFileNode(66...74)(
|
||||
"whitequark/pattern_matching__FILE__LINE_literals.txt"
|
||||
),
|
||||
SourceLineNode(76...84)(),
|
||||
SourceEncodingNode(86...98)()],
|
||||
nil,
|
||||
|
@ -1,6 +1,6 @@
|
||||
ProgramNode(0...8)(
|
||||
[],
|
||||
StatementsNode(0...8)(
|
||||
[SourceFileNode(0...8)("/fixtures/whitequark/string___FILE__.txt")]
|
||||
[SourceFileNode(0...8)("whitequark/string___FILE__.txt")]
|
||||
)
|
||||
)
|
||||
|
2241
yarp/api_node.c
2241
yarp/api_node.c
File diff suppressed because it is too large
Load Diff
@ -9,14 +9,13 @@
|
||||
#define YARP_AST_H
|
||||
|
||||
#include "yarp/defines.h"
|
||||
#include "yarp/util/yp_constant_pool.h"
|
||||
#include "yarp/util/yp_string.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "yarp/util/yp_constant_pool.h"
|
||||
#include "yarp/util/yp_string.h"
|
||||
|
||||
// This enum represents every type of token in the Ruby source.
|
||||
typedef enum yp_token_type {
|
||||
YP_TOKEN_EOF = 1, // final token in the file
|
||||
|
826
yarp/compile.c
826
yarp/compile.c
@ -1,826 +0,0 @@
|
||||
#include "yarp/extension.h"
|
||||
|
||||
typedef enum {
|
||||
YP_ISEQ_TYPE_TOP,
|
||||
YP_ISEQ_TYPE_BLOCK
|
||||
} yp_iseq_type_t;
|
||||
|
||||
typedef enum {
|
||||
YP_RUBY_EVENT_B_CALL,
|
||||
YP_RUBY_EVENT_B_RETURN
|
||||
} yp_ruby_event_t;
|
||||
|
||||
typedef struct yp_iseq_compiler {
|
||||
// This is the parent compiler. It is used to communicate between ISEQs that
|
||||
// need to be able to jump back to the parent ISEQ.
|
||||
struct yp_iseq_compiler *parent;
|
||||
|
||||
// This is the list of local variables that are defined on this scope.
|
||||
yp_constant_id_list_t *locals;
|
||||
|
||||
// This is the instruction sequence that we are compiling. It's actually just
|
||||
// a Ruby array that maps to the output of RubyVM::InstructionSequence#to_a.
|
||||
VALUE insns;
|
||||
|
||||
// This is a list of IDs coming from the instructions that are being compiled.
|
||||
// In theory they should be deterministic, but we don't have that
|
||||
// functionality yet. Fortunately you can pass -1 for all of them and
|
||||
// everything for the most part continues to work.
|
||||
VALUE node_ids;
|
||||
|
||||
// This is the current size of the instruction sequence's stack.
|
||||
int stack_size;
|
||||
|
||||
// This is the maximum size of the instruction sequence's stack.
|
||||
int stack_max;
|
||||
|
||||
// This is the name of the instruction sequence.
|
||||
const char *name;
|
||||
|
||||
// This is the type of the instruction sequence.
|
||||
yp_iseq_type_t type;
|
||||
|
||||
// This is the optional argument information.
|
||||
VALUE optionals;
|
||||
|
||||
// This is the number of arguments.
|
||||
int arg_size;
|
||||
|
||||
// This is the current size of the instruction sequence's instructions and
|
||||
// operands.
|
||||
size_t size;
|
||||
|
||||
// This is the index of the current inline storage.
|
||||
size_t inline_storage_index;
|
||||
} yp_iseq_compiler_t;
|
||||
|
||||
static void
|
||||
yp_iseq_compiler_init(yp_iseq_compiler_t *compiler, yp_iseq_compiler_t *parent, yp_constant_id_list_t *locals, const char *name, yp_iseq_type_t type) {
|
||||
*compiler = (yp_iseq_compiler_t) {
|
||||
.parent = parent,
|
||||
.locals = locals,
|
||||
.insns = rb_ary_new(),
|
||||
.node_ids = rb_ary_new(),
|
||||
.stack_size = 0,
|
||||
.stack_max = 0,
|
||||
.name = name,
|
||||
.type = type,
|
||||
.optionals = rb_hash_new(),
|
||||
.arg_size = 0,
|
||||
.size = 0,
|
||||
.inline_storage_index = 0
|
||||
};
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/* Utilities */
|
||||
/******************************************************************************/
|
||||
|
||||
static inline int
|
||||
sizet2int(size_t value) {
|
||||
if (value > INT_MAX) rb_raise(rb_eRuntimeError, "value too large");
|
||||
return (int) value;
|
||||
}
|
||||
|
||||
static int
|
||||
local_index(yp_iseq_compiler_t *compiler, yp_constant_id_t constant_id, int depth) {
|
||||
int compiler_index;
|
||||
yp_iseq_compiler_t *local_compiler = compiler;
|
||||
|
||||
for (compiler_index = 0; compiler_index < depth; compiler_index++) {
|
||||
local_compiler = local_compiler->parent;
|
||||
assert(local_compiler != NULL);
|
||||
}
|
||||
|
||||
size_t index;
|
||||
for (index = 0; index < local_compiler->locals->size; index++) {
|
||||
if (local_compiler->locals->ids[index] == constant_id) {
|
||||
return sizet2int(local_compiler->locals->size - index + 2);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/* Parse specific VALUEs from strings */
|
||||
/******************************************************************************/
|
||||
|
||||
static VALUE
|
||||
parse_number(const char *start, const char *end) {
|
||||
size_t length = end - start;
|
||||
|
||||
char *buffer = alloca(length + 1);
|
||||
memcpy(buffer, start, length);
|
||||
|
||||
buffer[length] = '\0';
|
||||
return rb_cstr_to_inum(buffer, -10, Qfalse);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
parse_string(yp_string_t *string) {
|
||||
return rb_str_new(yp_string_source(string), yp_string_length(string));
|
||||
}
|
||||
|
||||
static inline ID
|
||||
parse_symbol(const char *start, const char *end) {
|
||||
return rb_intern2(start, end - start);
|
||||
}
|
||||
|
||||
static inline ID
|
||||
parse_location_symbol(yp_location_t *location) {
|
||||
return parse_symbol(location->start, location->end);
|
||||
}
|
||||
|
||||
static inline ID
|
||||
parse_node_symbol(yp_node_t *node) {
|
||||
return parse_symbol(node->location.start, node->location.end);
|
||||
}
|
||||
|
||||
static inline ID
|
||||
parse_string_symbol(yp_string_t *string) {
|
||||
const char *start = yp_string_source(string);
|
||||
return parse_symbol(start, start + yp_string_length(string));
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/* Create Ruby objects for compilation */
|
||||
/******************************************************************************/
|
||||
|
||||
static VALUE
|
||||
yp_iseq_new(yp_iseq_compiler_t *compiler) {
|
||||
VALUE code_location = rb_ary_new_capa(4);
|
||||
rb_ary_push(code_location, INT2FIX(1));
|
||||
rb_ary_push(code_location, INT2FIX(0));
|
||||
rb_ary_push(code_location, INT2FIX(1));
|
||||
rb_ary_push(code_location, INT2FIX(0));
|
||||
|
||||
VALUE data = rb_hash_new();
|
||||
rb_hash_aset(data, ID2SYM(rb_intern("arg_size")), INT2FIX(compiler->arg_size));
|
||||
rb_hash_aset(data, ID2SYM(rb_intern("local_size")), INT2FIX(0));
|
||||
rb_hash_aset(data, ID2SYM(rb_intern("stack_max")), INT2FIX(compiler->stack_max));
|
||||
rb_hash_aset(data, ID2SYM(rb_intern("node_id")), INT2FIX(-1));
|
||||
rb_hash_aset(data, ID2SYM(rb_intern("code_location")), code_location);
|
||||
rb_hash_aset(data, ID2SYM(rb_intern("node_ids")), compiler->node_ids);
|
||||
|
||||
VALUE type = Qnil;
|
||||
switch (compiler->type) {
|
||||
case YP_ISEQ_TYPE_TOP:
|
||||
type = ID2SYM(rb_intern("top"));
|
||||
break;
|
||||
case YP_ISEQ_TYPE_BLOCK:
|
||||
type = ID2SYM(rb_intern("block"));
|
||||
break;
|
||||
}
|
||||
|
||||
VALUE iseq = rb_ary_new_capa(13);
|
||||
rb_ary_push(iseq, rb_str_new_cstr("YARVInstructionSequence/SimpleDataFormat"));
|
||||
rb_ary_push(iseq, INT2FIX(3));
|
||||
rb_ary_push(iseq, INT2FIX(3));
|
||||
rb_ary_push(iseq, INT2FIX(1));
|
||||
rb_ary_push(iseq, data);
|
||||
rb_ary_push(iseq, rb_str_new_cstr(compiler->name));
|
||||
rb_ary_push(iseq, rb_str_new_cstr("<compiled>"));
|
||||
rb_ary_push(iseq, rb_str_new_cstr("<compiled>"));
|
||||
rb_ary_push(iseq, INT2FIX(1));
|
||||
rb_ary_push(iseq, type);
|
||||
rb_ary_push(iseq, rb_ary_new());
|
||||
rb_ary_push(iseq, compiler->optionals);
|
||||
rb_ary_push(iseq, rb_ary_new());
|
||||
rb_ary_push(iseq, compiler->insns);
|
||||
|
||||
return iseq;
|
||||
}
|
||||
|
||||
// static const int YP_CALLDATA_ARGS_SPLAT = 1 << 0;
|
||||
// static const int YP_CALLDATA_ARGS_BLOCKARG = 1 << 1;
|
||||
static const int YP_CALLDATA_FCALL = 1 << 2;
|
||||
static const int YP_CALLDATA_VCALL = 1 << 3;
|
||||
static const int YP_CALLDATA_ARGS_SIMPLE = 1 << 4;
|
||||
// static const int YP_CALLDATA_BLOCKISEQ = 1 << 5;
|
||||
// static const int YP_CALLDATA_KWARG = 1 << 6;
|
||||
// static const int YP_CALLDATA_KW_SPLAT = 1 << 7;
|
||||
// static const int YP_CALLDATA_TAILCALL = 1 << 8;
|
||||
// static const int YP_CALLDATA_SUPER = 1 << 9;
|
||||
// static const int YP_CALLDATA_ZSUPER = 1 << 10;
|
||||
// static const int YP_CALLDATA_OPT_SEND = 1 << 11;
|
||||
// static const int YP_CALLDATA_KW_SPLAT_MUT = 1 << 12;
|
||||
|
||||
static VALUE
|
||||
yp_calldata_new(ID mid, int flag, size_t orig_argc) {
|
||||
VALUE calldata = rb_hash_new();
|
||||
|
||||
rb_hash_aset(calldata, ID2SYM(rb_intern("mid")), ID2SYM(mid));
|
||||
rb_hash_aset(calldata, ID2SYM(rb_intern("flag")), INT2FIX(flag));
|
||||
rb_hash_aset(calldata, ID2SYM(rb_intern("orig_argc")), INT2FIX(orig_argc));
|
||||
|
||||
return calldata;
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
yp_inline_storage_new(yp_iseq_compiler_t *compiler) {
|
||||
return INT2FIX(compiler->inline_storage_index++);
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/* Push instructions onto a compiler */
|
||||
/******************************************************************************/
|
||||
|
||||
static VALUE
|
||||
push_insn(yp_iseq_compiler_t *compiler, int stack_change, size_t size, ...) {
|
||||
va_list opnds;
|
||||
va_start(opnds, size);
|
||||
|
||||
VALUE insn = rb_ary_new_capa(size);
|
||||
for (size_t index = 0; index < size; index++) {
|
||||
rb_ary_push(insn, va_arg(opnds, VALUE));
|
||||
}
|
||||
|
||||
va_end(opnds);
|
||||
|
||||
compiler->stack_size += stack_change;
|
||||
if (compiler->stack_size > compiler->stack_max) {
|
||||
compiler->stack_max = compiler->stack_size;
|
||||
}
|
||||
|
||||
compiler->size += size;
|
||||
rb_ary_push(compiler->insns, insn);
|
||||
rb_ary_push(compiler->node_ids, INT2FIX(-1));
|
||||
|
||||
return insn;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
push_label(yp_iseq_compiler_t *compiler) {
|
||||
VALUE label = ID2SYM(rb_intern_str(rb_sprintf("label_%zu", compiler->size)));
|
||||
rb_ary_push(compiler->insns, label);
|
||||
return label;
|
||||
}
|
||||
|
||||
static void
|
||||
push_ruby_event(yp_iseq_compiler_t *compiler, yp_ruby_event_t event) {
|
||||
switch (event) {
|
||||
case YP_RUBY_EVENT_B_CALL:
|
||||
rb_ary_push(compiler->insns, ID2SYM(rb_intern("RUBY_EVENT_B_CALL")));
|
||||
break;
|
||||
case YP_RUBY_EVENT_B_RETURN:
|
||||
rb_ary_push(compiler->insns, ID2SYM(rb_intern("RUBY_EVENT_B_RETURN")));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_anytostring(yp_iseq_compiler_t *compiler) {
|
||||
return push_insn(compiler, -2 + 1, 1, ID2SYM(rb_intern("anytostring")));
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_branchif(yp_iseq_compiler_t *compiler, VALUE label) {
|
||||
return push_insn(compiler, -1 + 0, 2, ID2SYM(rb_intern("branchif")), label);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_branchunless(yp_iseq_compiler_t *compiler, VALUE label) {
|
||||
return push_insn(compiler, -1 + 0, 2, ID2SYM(rb_intern("branchunless")), label);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_concatstrings(yp_iseq_compiler_t *compiler, int count) {
|
||||
return push_insn(compiler, -count + 1, 2, ID2SYM(rb_intern("concatstrings")), INT2FIX(count));
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_dup(yp_iseq_compiler_t *compiler) {
|
||||
return push_insn(compiler, -1 + 2, 1, ID2SYM(rb_intern("dup")));
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_getclassvariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
|
||||
return push_insn(compiler, -0 + 1, 3, ID2SYM(rb_intern("getclassvariable")), name, inline_storage);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_getconstant(yp_iseq_compiler_t *compiler, VALUE name) {
|
||||
return push_insn(compiler, -2 + 1, 2, ID2SYM(rb_intern("getconstant")), name);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_getglobal(yp_iseq_compiler_t *compiler, VALUE name) {
|
||||
return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("getglobal")), name);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_getinstancevariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
|
||||
return push_insn(compiler, -0 + 1, 3, ID2SYM(rb_intern("getinstancevariable")), name, inline_storage);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_getlocal(yp_iseq_compiler_t *compiler, VALUE index, VALUE depth) {
|
||||
return push_insn(compiler, -0 + 1, 3, ID2SYM(rb_intern("getlocal")), index, depth);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_leave(yp_iseq_compiler_t *compiler) {
|
||||
return push_insn(compiler, -1 + 0, 1, ID2SYM(rb_intern("leave")));
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_newarray(yp_iseq_compiler_t *compiler, int count) {
|
||||
return push_insn(compiler, -count + 1, 2, ID2SYM(rb_intern("newarray")), INT2FIX(count));
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_newhash(yp_iseq_compiler_t *compiler, int count) {
|
||||
return push_insn(compiler, -count + 1, 2, ID2SYM(rb_intern("newhash")), INT2FIX(count));
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_newrange(yp_iseq_compiler_t *compiler, VALUE flag) {
|
||||
return push_insn(compiler, -2 + 1, 2, ID2SYM(rb_intern("newrange")), flag);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_nop(yp_iseq_compiler_t *compiler) {
|
||||
return push_insn(compiler, -2 + 1, 1, ID2SYM(rb_intern("nop")));
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_objtostring(yp_iseq_compiler_t *compiler, VALUE calldata) {
|
||||
return push_insn(compiler, -1 + 1, 2, ID2SYM(rb_intern("objtostring")), calldata);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_pop(yp_iseq_compiler_t *compiler) {
|
||||
return push_insn(compiler, -1 + 0, 1, ID2SYM(rb_intern("pop")));
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_putnil(yp_iseq_compiler_t *compiler) {
|
||||
return push_insn(compiler, -0 + 1, 1, ID2SYM(rb_intern("putnil")));
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_putobject(yp_iseq_compiler_t *compiler, VALUE value) {
|
||||
return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("putobject")), value);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_putself(yp_iseq_compiler_t *compiler) {
|
||||
return push_insn(compiler, -0 + 1, 1, ID2SYM(rb_intern("putself")));
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_setlocal(yp_iseq_compiler_t *compiler, VALUE index, VALUE depth) {
|
||||
return push_insn(compiler, -1 + 0, 3, ID2SYM(rb_intern("setlocal")), index, depth);
|
||||
}
|
||||
|
||||
static const VALUE YP_SPECIALOBJECT_VMCORE = INT2FIX(1);
|
||||
static const VALUE YP_SPECIALOBJECT_CBASE = INT2FIX(2);
|
||||
// static const VALUE YP_SPECIALOBJECT_CONST_BASE = INT2FIX(3);
|
||||
|
||||
static inline VALUE
|
||||
push_putspecialobject(yp_iseq_compiler_t *compiler, VALUE object) {
|
||||
return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("putspecialobject")), object);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_putstring(yp_iseq_compiler_t *compiler, VALUE string) {
|
||||
return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("putstring")), string);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_send(yp_iseq_compiler_t *compiler, int stack_change, VALUE calldata, VALUE block_iseq) {
|
||||
return push_insn(compiler, stack_change, 3, ID2SYM(rb_intern("send")), calldata, block_iseq);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_setclassvariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
|
||||
return push_insn(compiler, -1 + 0, 3, ID2SYM(rb_intern("setclassvariable")), name, inline_storage);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_setglobal(yp_iseq_compiler_t *compiler, VALUE name) {
|
||||
return push_insn(compiler, -1 + 0, 2, ID2SYM(rb_intern("setglobal")), name);
|
||||
}
|
||||
|
||||
static inline VALUE
|
||||
push_setinstancevariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
|
||||
return push_insn(compiler, -1 + 0, 3, ID2SYM(rb_intern("setinstancevariable")), name, inline_storage);
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/* Compile an AST node using the given compiler */
|
||||
/******************************************************************************/
|
||||
|
||||
static void
|
||||
yp_compile_node(yp_iseq_compiler_t *compiler, yp_node_t *base_node) {
|
||||
switch (base_node->type) {
|
||||
case YP_NODE_ALIAS_NODE: {
|
||||
yp_alias_node_t *node = (yp_alias_node_t *) base_node;
|
||||
|
||||
push_putspecialobject(compiler, YP_SPECIALOBJECT_VMCORE);
|
||||
push_putspecialobject(compiler, YP_SPECIALOBJECT_CBASE);
|
||||
yp_compile_node(compiler, node->new_name);
|
||||
yp_compile_node(compiler, node->old_name);
|
||||
push_send(compiler, -3, yp_calldata_new(rb_intern("core#set_method_alias"), YP_CALLDATA_ARGS_SIMPLE, 3), Qnil);
|
||||
|
||||
return;
|
||||
}
|
||||
case YP_NODE_AND_NODE: {
|
||||
yp_and_node_t *node = (yp_and_node_t *) base_node;
|
||||
|
||||
yp_compile_node(compiler, node->left);
|
||||
push_dup(compiler);
|
||||
VALUE branchunless = push_branchunless(compiler, Qnil);
|
||||
|
||||
push_pop(compiler);
|
||||
yp_compile_node(compiler, node->right);
|
||||
|
||||
VALUE label = push_label(compiler);
|
||||
rb_ary_store(branchunless, 1, label);
|
||||
|
||||
return;
|
||||
}
|
||||
case YP_NODE_ARGUMENTS_NODE: {
|
||||
yp_arguments_node_t *node = (yp_arguments_node_t *) base_node;
|
||||
yp_node_list_t node_list = node->arguments;
|
||||
for (size_t index = 0; index < node_list.size; index++) {
|
||||
yp_compile_node(compiler, node_list.nodes[index]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
case YP_NODE_ARRAY_NODE: {
|
||||
yp_array_node_t *node = (yp_array_node_t *) base_node;
|
||||
yp_node_list_t elements = node->elements;
|
||||
for (size_t index = 0; index < elements.size; index++) {
|
||||
yp_compile_node(compiler, elements.nodes[index]);
|
||||
}
|
||||
push_newarray(compiler, sizet2int(elements.size));
|
||||
return;
|
||||
}
|
||||
case YP_NODE_ASSOC_NODE: {
|
||||
yp_assoc_node_t *node = (yp_assoc_node_t *) base_node;
|
||||
yp_compile_node(compiler, node->key);
|
||||
yp_compile_node(compiler, node->value);
|
||||
return;
|
||||
}
|
||||
case YP_NODE_BLOCK_NODE: {
|
||||
yp_block_node_t *node = (yp_block_node_t *) base_node;
|
||||
|
||||
VALUE optional_labels = rb_ary_new();
|
||||
if (node->parameters &&
|
||||
node->parameters->parameters &&
|
||||
node->parameters->parameters->optionals.size > 0) {
|
||||
compiler->arg_size += node->parameters->parameters->optionals.size;
|
||||
|
||||
yp_node_list_t *optionals = &node->parameters->parameters->optionals;
|
||||
for (size_t i = 0; i < optionals->size; i++) {
|
||||
VALUE label = push_label(compiler);
|
||||
rb_ary_push(optional_labels, label);
|
||||
yp_compile_node(compiler, optionals->nodes[i]);
|
||||
}
|
||||
VALUE label = push_label(compiler);
|
||||
rb_ary_push(optional_labels, label);
|
||||
rb_hash_aset(compiler->optionals, ID2SYM(rb_intern("opt")), optional_labels);
|
||||
|
||||
push_ruby_event(compiler, YP_RUBY_EVENT_B_CALL);
|
||||
push_nop(compiler);
|
||||
} else {
|
||||
push_ruby_event(compiler, YP_RUBY_EVENT_B_CALL);
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (node->statements) {
|
||||
yp_compile_node(compiler, node->statements);
|
||||
} else {
|
||||
push_putnil(compiler);
|
||||
}
|
||||
push_ruby_event(compiler, YP_RUBY_EVENT_B_RETURN);
|
||||
push_leave(compiler);
|
||||
return;
|
||||
}
|
||||
case YP_NODE_CALL_NODE: {
|
||||
yp_call_node_t *node = (yp_call_node_t *) base_node;
|
||||
|
||||
ID mid = parse_location_symbol(&node->message_loc);
|
||||
int flags = 0;
|
||||
size_t orig_argc;
|
||||
|
||||
if (node->receiver == NULL) {
|
||||
push_putself(compiler);
|
||||
} else {
|
||||
yp_compile_node(compiler, node->receiver);
|
||||
}
|
||||
|
||||
if (node->arguments == NULL) {
|
||||
if (flags & YP_CALLDATA_FCALL) flags |= YP_CALLDATA_VCALL;
|
||||
orig_argc = 0;
|
||||
} else {
|
||||
yp_arguments_node_t *arguments = node->arguments;
|
||||
yp_compile_node(compiler, (yp_node_t *) arguments);
|
||||
orig_argc = arguments->arguments.size;
|
||||
}
|
||||
|
||||
VALUE block_iseq = Qnil;
|
||||
if (node->block != NULL) {
|
||||
yp_iseq_compiler_t block_compiler;
|
||||
yp_iseq_compiler_init(
|
||||
&block_compiler,
|
||||
compiler,
|
||||
&node->block->locals,
|
||||
"block in <compiled>",
|
||||
YP_ISEQ_TYPE_BLOCK
|
||||
);
|
||||
|
||||
yp_compile_node(&block_compiler, (yp_node_t *) node->block);
|
||||
block_iseq = yp_iseq_new(&block_compiler);
|
||||
}
|
||||
|
||||
if (block_iseq == Qnil && flags == 0) {
|
||||
flags |= YP_CALLDATA_ARGS_SIMPLE;
|
||||
}
|
||||
|
||||
if (node->receiver == NULL) {
|
||||
flags |= YP_CALLDATA_FCALL;
|
||||
|
||||
if (block_iseq == Qnil && node->arguments == NULL) {
|
||||
flags |= YP_CALLDATA_VCALL;
|
||||
}
|
||||
}
|
||||
|
||||
push_send(compiler, -sizet2int(orig_argc), yp_calldata_new(mid, flags, orig_argc), block_iseq);
|
||||
return;
|
||||
}
|
||||
case YP_NODE_CLASS_VARIABLE_READ_NODE: {
|
||||
yp_class_variable_read_node_t *node = (yp_class_variable_read_node_t *) base_node;
|
||||
push_getclassvariable(compiler, ID2SYM(parse_node_symbol((yp_node_t *) node)), yp_inline_storage_new(compiler));
|
||||
return;
|
||||
}
|
||||
case YP_NODE_CLASS_VARIABLE_WRITE_NODE: {
|
||||
yp_class_variable_write_node_t *node = (yp_class_variable_write_node_t *) base_node;
|
||||
if (node->value == NULL) {
|
||||
rb_raise(rb_eNotImpError, "class variable write without value not implemented");
|
||||
}
|
||||
|
||||
yp_compile_node(compiler, node->value);
|
||||
push_dup(compiler);
|
||||
push_setclassvariable(compiler, ID2SYM(parse_location_symbol(&node->name_loc)), yp_inline_storage_new(compiler));
|
||||
return;
|
||||
}
|
||||
case YP_NODE_CONSTANT_PATH_NODE: {
|
||||
yp_constant_path_node_t *node = (yp_constant_path_node_t *) base_node;
|
||||
yp_compile_node(compiler, node->parent);
|
||||
push_putobject(compiler, Qfalse);
|
||||
push_getconstant(compiler, ID2SYM(parse_node_symbol((yp_node_t *) node->child)));
|
||||
return;
|
||||
}
|
||||
case YP_NODE_CONSTANT_READ_NODE:
|
||||
push_putnil(compiler);
|
||||
push_putobject(compiler, Qtrue);
|
||||
push_getconstant(compiler, ID2SYM(parse_node_symbol((yp_node_t *) base_node)));
|
||||
return;
|
||||
case YP_NODE_EMBEDDED_STATEMENTS_NODE: {
|
||||
yp_embedded_statements_node_t *node = (yp_embedded_statements_node_t *) base_node;
|
||||
yp_compile_node(compiler, (yp_node_t *) node->statements);
|
||||
return;
|
||||
}
|
||||
case YP_NODE_FALSE_NODE:
|
||||
push_putobject(compiler, Qfalse);
|
||||
return;
|
||||
case YP_NODE_GLOBAL_VARIABLE_READ_NODE:
|
||||
push_getglobal(compiler, ID2SYM(parse_location_symbol(&base_node->location)));
|
||||
return;
|
||||
case YP_NODE_GLOBAL_VARIABLE_WRITE_NODE: {
|
||||
yp_global_variable_write_node_t *node = (yp_global_variable_write_node_t *) base_node;
|
||||
|
||||
if (node->value == NULL) {
|
||||
rb_raise(rb_eNotImpError, "global variable write without value not implemented");
|
||||
}
|
||||
|
||||
yp_compile_node(compiler, node->value);
|
||||
push_dup(compiler);
|
||||
push_setglobal(compiler, ID2SYM(parse_location_symbol(&node->name_loc)));
|
||||
return;
|
||||
}
|
||||
case YP_NODE_HASH_NODE: {
|
||||
yp_hash_node_t *node = (yp_hash_node_t *) base_node;
|
||||
yp_node_list_t elements = node->elements;
|
||||
|
||||
for (size_t index = 0; index < elements.size; index++) {
|
||||
yp_compile_node(compiler, elements.nodes[index]);
|
||||
}
|
||||
|
||||
push_newhash(compiler, sizet2int(elements.size * 2));
|
||||
return;
|
||||
}
|
||||
case YP_NODE_INSTANCE_VARIABLE_READ_NODE:
|
||||
push_getinstancevariable(compiler, ID2SYM(parse_node_symbol((yp_node_t *) base_node)), yp_inline_storage_new(compiler));
|
||||
return;
|
||||
case YP_NODE_INSTANCE_VARIABLE_WRITE_NODE: {
|
||||
yp_instance_variable_write_node_t *node = (yp_instance_variable_write_node_t *) base_node;
|
||||
|
||||
if (node->value == NULL) {
|
||||
rb_raise(rb_eNotImpError, "instance variable write without value not implemented");
|
||||
}
|
||||
|
||||
yp_compile_node(compiler, node->value);
|
||||
push_dup(compiler);
|
||||
push_setinstancevariable(compiler, ID2SYM(parse_location_symbol(&node->name_loc)), yp_inline_storage_new(compiler));
|
||||
return;
|
||||
}
|
||||
case YP_NODE_INTEGER_NODE:
|
||||
push_putobject(compiler, parse_number(base_node->location.start, base_node->location.end));
|
||||
return;
|
||||
case YP_NODE_INTERPOLATED_STRING_NODE: {
|
||||
yp_interpolated_string_node_t *node = (yp_interpolated_string_node_t *) base_node;
|
||||
|
||||
for (size_t index = 0; index < node->parts.size; index++) {
|
||||
yp_node_t *part = node->parts.nodes[index];
|
||||
|
||||
switch (part->type) {
|
||||
case YP_NODE_STRING_NODE: {
|
||||
yp_string_node_t *string_node = (yp_string_node_t *) part;
|
||||
push_putobject(compiler, parse_string(&string_node->unescaped));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
yp_compile_node(compiler, part);
|
||||
push_dup(compiler);
|
||||
push_objtostring(compiler, yp_calldata_new(rb_intern("to_s"), YP_CALLDATA_FCALL | YP_CALLDATA_ARGS_SIMPLE, 0));
|
||||
push_anytostring(compiler);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
push_concatstrings(compiler, sizet2int(node->parts.size));
|
||||
return;
|
||||
}
|
||||
case YP_NODE_KEYWORD_HASH_NODE: {
|
||||
yp_keyword_hash_node_t *node = (yp_keyword_hash_node_t *) base_node;
|
||||
yp_node_list_t elements = node->elements;
|
||||
|
||||
for (size_t index = 0; index < elements.size; index++) {
|
||||
yp_compile_node(compiler, elements.nodes[index]);
|
||||
}
|
||||
|
||||
push_newhash(compiler, sizet2int(elements.size * 2));
|
||||
return;
|
||||
}
|
||||
case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
|
||||
yp_local_variable_read_node_t *node = (yp_local_variable_read_node_t *) base_node;
|
||||
int index = local_index(compiler, node->constant_id, node->depth);
|
||||
|
||||
push_getlocal(compiler, INT2FIX(index), INT2FIX(node->depth));
|
||||
return;
|
||||
}
|
||||
case YP_NODE_LOCAL_VARIABLE_WRITE_NODE: {
|
||||
yp_local_variable_write_node_t *node = (yp_local_variable_write_node_t *) base_node;
|
||||
|
||||
if (node->value == NULL) {
|
||||
rb_raise(rb_eNotImpError, "local variable write without value not implemented");
|
||||
}
|
||||
|
||||
int index = local_index(compiler, node->constant_id, node->depth);
|
||||
|
||||
yp_compile_node(compiler, node->value);
|
||||
push_dup(compiler);
|
||||
push_setlocal(compiler, INT2FIX(index), INT2FIX(node->depth));
|
||||
return;
|
||||
}
|
||||
case YP_NODE_NIL_NODE:
|
||||
push_putnil(compiler);
|
||||
return;
|
||||
case YP_NODE_OR_NODE: {
|
||||
yp_or_node_t *node = (yp_or_node_t *) base_node;
|
||||
|
||||
yp_compile_node(compiler, node->left);
|
||||
push_dup(compiler);
|
||||
VALUE branchif = push_branchif(compiler, Qnil);
|
||||
|
||||
push_pop(compiler);
|
||||
yp_compile_node(compiler, node->right);
|
||||
|
||||
VALUE label = push_label(compiler);
|
||||
rb_ary_store(branchif, 1, label);
|
||||
|
||||
return;
|
||||
}
|
||||
case YP_NODE_PARENTHESES_NODE: {
|
||||
yp_parentheses_node_t *node = (yp_parentheses_node_t *) base_node;
|
||||
|
||||
if (node->statements == NULL) {
|
||||
push_putnil(compiler);
|
||||
} else {
|
||||
yp_compile_node(compiler, node->statements);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
case YP_NODE_PROGRAM_NODE: {
|
||||
yp_program_node_t *node = (yp_program_node_t *) base_node;
|
||||
|
||||
if (node->statements->body.size == 0) {
|
||||
push_putnil(compiler);
|
||||
} else {
|
||||
yp_compile_node(compiler, (yp_node_t *) node->statements);
|
||||
}
|
||||
|
||||
push_leave(compiler);
|
||||
return;
|
||||
}
|
||||
case YP_NODE_RANGE_NODE: {
|
||||
yp_range_node_t *node = (yp_range_node_t *) base_node;
|
||||
|
||||
if (node->left == NULL) {
|
||||
push_putnil(compiler);
|
||||
} else {
|
||||
yp_compile_node(compiler, node->left);
|
||||
}
|
||||
|
||||
if (node->right == NULL) {
|
||||
push_putnil(compiler);
|
||||
} else {
|
||||
yp_compile_node(compiler, node->right);
|
||||
}
|
||||
|
||||
push_newrange(compiler, INT2FIX((node->operator_loc.end - node->operator_loc.start) == 3));
|
||||
return;
|
||||
}
|
||||
case YP_NODE_SELF_NODE:
|
||||
push_putself(compiler);
|
||||
return;
|
||||
case YP_NODE_STATEMENTS_NODE: {
|
||||
yp_statements_node_t *node = (yp_statements_node_t *) base_node;
|
||||
yp_node_list_t node_list = node->body;
|
||||
for (size_t index = 0; index < node_list.size; index++) {
|
||||
yp_compile_node(compiler, node_list.nodes[index]);
|
||||
if (index < node_list.size - 1) push_pop(compiler);
|
||||
}
|
||||
return;
|
||||
}
|
||||
case YP_NODE_STRING_NODE: {
|
||||
yp_string_node_t *node = (yp_string_node_t *) base_node;
|
||||
push_putstring(compiler, parse_string(&node->unescaped));
|
||||
return;
|
||||
}
|
||||
case YP_NODE_SYMBOL_NODE: {
|
||||
yp_symbol_node_t *node = (yp_symbol_node_t *) base_node;
|
||||
push_putobject(compiler, ID2SYM(parse_string_symbol(&node->unescaped)));
|
||||
return;
|
||||
}
|
||||
case YP_NODE_TRUE_NODE:
|
||||
push_putobject(compiler, Qtrue);
|
||||
return;
|
||||
case YP_NODE_UNDEF_NODE: {
|
||||
yp_undef_node_t *node = (yp_undef_node_t *) base_node;
|
||||
|
||||
for (size_t index = 0; index < node->names.size; index++) {
|
||||
push_putspecialobject(compiler, YP_SPECIALOBJECT_VMCORE);
|
||||
push_putspecialobject(compiler, YP_SPECIALOBJECT_CBASE);
|
||||
yp_compile_node(compiler, node->names.nodes[index]);
|
||||
push_send(compiler, -2, yp_calldata_new(rb_intern("core#undef_method"), YP_CALLDATA_ARGS_SIMPLE, 2), Qnil);
|
||||
|
||||
if (index < node->names.size - 1) push_pop(compiler);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
case YP_NODE_X_STRING_NODE: {
|
||||
yp_x_string_node_t *node = (yp_x_string_node_t *) base_node;
|
||||
push_putself(compiler);
|
||||
push_putobject(compiler, parse_string(&node->unescaped));
|
||||
push_send(compiler, -1, yp_calldata_new(rb_intern("`"), YP_CALLDATA_FCALL | YP_CALLDATA_ARGS_SIMPLE, 1), Qnil);
|
||||
return;
|
||||
}
|
||||
case YP_NODE_OPTIONAL_PARAMETER_NODE: {
|
||||
yp_optional_parameter_node_t *node = (yp_optional_parameter_node_t *) base_node;
|
||||
int depth = 0;
|
||||
int index = local_index(compiler, node->constant_id, depth);
|
||||
yp_compile_node(compiler, node->value);
|
||||
push_setlocal(compiler, INT2FIX(index), INT2FIX(depth));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
rb_raise(rb_eNotImpError, "node type %d not implemented", base_node->type);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// This function compiles the given node into a list of instructions.
|
||||
VALUE
|
||||
yp_compile(yp_node_t *node) {
|
||||
assert(node->type == YP_NODE_PROGRAM_NODE);
|
||||
|
||||
yp_iseq_compiler_t compiler;
|
||||
yp_iseq_compiler_init(
|
||||
&compiler,
|
||||
NULL,
|
||||
&((yp_program_node_t *) node)->locals,
|
||||
"<compiled>",
|
||||
YP_ISEQ_TYPE_TOP
|
||||
);
|
||||
|
||||
yp_compile_node(&compiler, node);
|
||||
return yp_iseq_new(&compiler);
|
||||
}
|
1
yarp/config.h
Normal file
1
yarp/config.h
Normal file
@ -0,0 +1 @@
|
||||
#include "ruby/config.h"
|
@ -1,8 +1,20 @@
|
||||
#ifndef YARP_DEFINES_H
|
||||
#define YARP_DEFINES_H
|
||||
|
||||
// This file should be included first by any *.h or *.c in YARP
|
||||
|
||||
#include "yarp/config.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdarg.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
// YP_EXPORTED_FUNCTION
|
||||
#if defined(_WIN32)
|
||||
#if defined(YP_STATIC)
|
||||
# define YP_EXPORTED_FUNCTION
|
||||
#elif defined(_WIN32)
|
||||
# define YP_EXPORTED_FUNCTION __declspec(dllexport) extern
|
||||
#else
|
||||
# ifndef YP_EXPORTED_FUNCTION
|
||||
@ -16,9 +28,9 @@
|
||||
|
||||
// YP_ATTRIBUTE_UNUSED
|
||||
#if defined(__GNUC__)
|
||||
# define YP_ATTRIBUTE_UNUSED __attribute__((unused))
|
||||
# define YP_ATTRIBUTE_UNUSED __attribute__((unused))
|
||||
#else
|
||||
# define YP_ATTRIBUTE_UNUSED
|
||||
# define YP_ATTRIBUTE_UNUSED
|
||||
#endif
|
||||
|
||||
// inline
|
||||
@ -26,4 +38,13 @@
|
||||
# define inline __inline
|
||||
#endif
|
||||
|
||||
int yp_strncasecmp(const char *string1, const char *string2, size_t length);
|
||||
|
||||
int yp_snprintf(char *dest, YP_ATTRIBUTE_UNUSED size_t size, const char *format, ...);
|
||||
|
||||
#if defined(HAVE_SNPRINTF)
|
||||
// We use snprintf if it's available
|
||||
# define yp_snprintf snprintf
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -2,12 +2,11 @@
|
||||
#define YARP_DIAGNOSTIC_H
|
||||
|
||||
#include "yarp/defines.h"
|
||||
#include "yarp/util/yp_list.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "yarp/util/yp_list.h"
|
||||
|
||||
// This struct represents a diagnostic found during parsing.
|
||||
typedef struct {
|
||||
yp_list_node_t node;
|
||||
|
@ -51,7 +51,8 @@ yp_encoding_t yp_encoding_ascii = {
|
||||
.char_width = yp_encoding_ascii_char_width,
|
||||
.alnum_char = yp_encoding_ascii_alnum_char,
|
||||
.alpha_char = yp_encoding_ascii_alpha_char,
|
||||
.isupper_char = yp_encoding_ascii_isupper_char
|
||||
.isupper_char = yp_encoding_ascii_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
||||
yp_encoding_t yp_encoding_ascii_8bit = {
|
||||
@ -60,4 +61,5 @@ yp_encoding_t yp_encoding_ascii_8bit = {
|
||||
.alnum_char = yp_encoding_ascii_alnum_char,
|
||||
.alpha_char = yp_encoding_ascii_alpha_char,
|
||||
.isupper_char = yp_encoding_ascii_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -74,5 +74,6 @@ yp_encoding_t yp_encoding_big5 = {
|
||||
.char_width = yp_encoding_big5_char_width,
|
||||
.alnum_char = yp_encoding_big5_alnum_char,
|
||||
.alpha_char = yp_encoding_big5_alpha_char,
|
||||
.isupper_char = yp_encoding_big5_isupper_char
|
||||
.isupper_char = yp_encoding_big5_isupper_char,
|
||||
.multibyte = true
|
||||
};
|
||||
|
@ -12,11 +12,28 @@
|
||||
// Each callback should return the number of bytes, or 0 if the next bytes are
|
||||
// invalid for the encoding and type.
|
||||
typedef struct {
|
||||
const char *name;
|
||||
// Return the number of bytes that the next character takes if it is valid
|
||||
// in the encoding.
|
||||
size_t (*char_width)(const char *c);
|
||||
|
||||
// Return the number of bytes that the next character takes if it is valid
|
||||
// in the encoding and is alphabetical.
|
||||
size_t (*alpha_char)(const char *c);
|
||||
|
||||
// Return the number of bytes that the next character takes if it is valid
|
||||
// in the encoding and is alphanumeric.
|
||||
size_t (*alnum_char)(const char *c);
|
||||
|
||||
// Return true if the next character is valid in the encoding and is an
|
||||
// uppercase character.
|
||||
bool (*isupper_char)(const char *c);
|
||||
|
||||
// The name of the encoding. This should correspond to a value that can be
|
||||
// passed to Encoding.find in Ruby.
|
||||
const char *name;
|
||||
|
||||
// Return true if the encoding is a multibyte encoding.
|
||||
bool multibyte;
|
||||
} yp_encoding_t;
|
||||
|
||||
// These bits define the location of each bit of metadata within the various
|
||||
|
@ -77,5 +77,6 @@ yp_encoding_t yp_encoding_euc_jp = {
|
||||
.char_width = yp_encoding_euc_jp_char_width,
|
||||
.alnum_char = yp_encoding_euc_jp_alnum_char,
|
||||
.alpha_char = yp_encoding_euc_jp_alpha_char,
|
||||
.isupper_char = yp_encoding_euc_jp_isupper_char
|
||||
.isupper_char = yp_encoding_euc_jp_isupper_char,
|
||||
.multibyte = true
|
||||
};
|
||||
|
@ -80,5 +80,6 @@ yp_encoding_t yp_encoding_gbk = {
|
||||
.char_width = yp_encoding_gbk_char_width,
|
||||
.alnum_char = yp_encoding_gbk_alnum_char,
|
||||
.alpha_char = yp_encoding_gbk_alpha_char,
|
||||
.isupper_char = yp_encoding_gbk_isupper_char
|
||||
.isupper_char = yp_encoding_gbk_isupper_char,
|
||||
.multibyte = true
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_1 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_iso_8859_1_alnum_char,
|
||||
.alpha_char = yp_encoding_iso_8859_1_alpha_char,
|
||||
.isupper_char = yp_encoding_iso_8859_1_isupper_char
|
||||
.isupper_char = yp_encoding_iso_8859_1_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_10 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_iso_8859_10_alnum_char,
|
||||
.alpha_char = yp_encoding_iso_8859_10_alpha_char,
|
||||
.isupper_char = yp_encoding_iso_8859_10_isupper_char
|
||||
.isupper_char = yp_encoding_iso_8859_10_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_11 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_iso_8859_11_alnum_char,
|
||||
.alpha_char = yp_encoding_iso_8859_11_alpha_char,
|
||||
.isupper_char = yp_encoding_iso_8859_11_isupper_char
|
||||
.isupper_char = yp_encoding_iso_8859_11_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_13 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_iso_8859_13_alnum_char,
|
||||
.alpha_char = yp_encoding_iso_8859_13_alpha_char,
|
||||
.isupper_char = yp_encoding_iso_8859_13_isupper_char
|
||||
.isupper_char = yp_encoding_iso_8859_13_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_14 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_iso_8859_14_alnum_char,
|
||||
.alpha_char = yp_encoding_iso_8859_14_alpha_char,
|
||||
.isupper_char = yp_encoding_iso_8859_14_isupper_char
|
||||
.isupper_char = yp_encoding_iso_8859_14_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_15 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_iso_8859_15_alnum_char,
|
||||
.alpha_char = yp_encoding_iso_8859_15_alpha_char,
|
||||
.isupper_char = yp_encoding_iso_8859_15_isupper_char
|
||||
.isupper_char = yp_encoding_iso_8859_15_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_16 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_iso_8859_16_alnum_char,
|
||||
.alpha_char = yp_encoding_iso_8859_16_alpha_char,
|
||||
.isupper_char = yp_encoding_iso_8859_16_isupper_char
|
||||
.isupper_char = yp_encoding_iso_8859_16_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_2 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_iso_8859_2_alnum_char,
|
||||
.alpha_char = yp_encoding_iso_8859_2_alpha_char,
|
||||
.isupper_char = yp_encoding_iso_8859_2_isupper_char
|
||||
.isupper_char = yp_encoding_iso_8859_2_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_3 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_iso_8859_3_alnum_char,
|
||||
.alpha_char = yp_encoding_iso_8859_3_alpha_char,
|
||||
.isupper_char = yp_encoding_iso_8859_3_isupper_char
|
||||
.isupper_char = yp_encoding_iso_8859_3_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_4 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_iso_8859_4_alnum_char,
|
||||
.alpha_char = yp_encoding_iso_8859_4_alpha_char,
|
||||
.isupper_char = yp_encoding_iso_8859_4_isupper_char
|
||||
.isupper_char = yp_encoding_iso_8859_4_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_5 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_iso_8859_5_alnum_char,
|
||||
.alpha_char = yp_encoding_iso_8859_5_alpha_char,
|
||||
.isupper_char = yp_encoding_iso_8859_5_isupper_char
|
||||
.isupper_char = yp_encoding_iso_8859_5_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_6 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_iso_8859_6_alnum_char,
|
||||
.alpha_char = yp_encoding_iso_8859_6_alpha_char,
|
||||
.isupper_char = yp_encoding_iso_8859_6_isupper_char
|
||||
.isupper_char = yp_encoding_iso_8859_6_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_7 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_iso_8859_7_alnum_char,
|
||||
.alpha_char = yp_encoding_iso_8859_7_alpha_char,
|
||||
.isupper_char = yp_encoding_iso_8859_7_isupper_char
|
||||
.isupper_char = yp_encoding_iso_8859_7_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_8 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_iso_8859_8_alnum_char,
|
||||
.alpha_char = yp_encoding_iso_8859_8_alpha_char,
|
||||
.isupper_char = yp_encoding_iso_8859_8_isupper_char
|
||||
.isupper_char = yp_encoding_iso_8859_8_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_9 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_iso_8859_9_alnum_char,
|
||||
.alpha_char = yp_encoding_iso_8859_9_alpha_char,
|
||||
.isupper_char = yp_encoding_iso_8859_9_isupper_char
|
||||
.isupper_char = yp_encoding_iso_8859_9_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -51,5 +51,6 @@ yp_encoding_t yp_encoding_koi8_r = {
|
||||
.char_width = yp_encoding_koi8_r_char_width,
|
||||
.alnum_char = yp_encoding_koi8_r_alnum_char,
|
||||
.alpha_char = yp_encoding_koi8_r_alpha_char,
|
||||
.isupper_char = yp_encoding_koi8_r_isupper_char
|
||||
.isupper_char = yp_encoding_koi8_r_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -77,5 +77,6 @@ yp_encoding_t yp_encoding_shift_jis = {
|
||||
.char_width = yp_encoding_shift_jis_char_width,
|
||||
.alnum_char = yp_encoding_shift_jis_alnum_char,
|
||||
.alpha_char = yp_encoding_shift_jis_alpha_char,
|
||||
.isupper_char = yp_encoding_shift_jis_isupper_char
|
||||
.isupper_char = yp_encoding_shift_jis_isupper_char,
|
||||
.multibyte = true
|
||||
};
|
||||
|
@ -2230,7 +2230,7 @@ utf_8_codepoint(const unsigned char *c, size_t *width) {
|
||||
|
||||
codepoint = (state != 0) ?
|
||||
(byte & 0x3fu) | (codepoint << 6) :
|
||||
(0xff >> type) & (byte);
|
||||
(0xffu >> type) & (byte);
|
||||
|
||||
state = utf_8_dfa[256 + (state * 16) + type];
|
||||
if (!state) {
|
||||
@ -2312,5 +2312,6 @@ yp_encoding_t yp_encoding_utf_8 = {
|
||||
.char_width = yp_encoding_utf_8_char_width,
|
||||
.alnum_char = yp_encoding_utf_8_alnum_char,
|
||||
.alpha_char = yp_encoding_utf_8_alpha_char,
|
||||
.isupper_char = yp_encoding_utf_8_isupper_char
|
||||
.isupper_char = yp_encoding_utf_8_isupper_char,
|
||||
.multibyte = true
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_windows_1251 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_windows_1251_alnum_char,
|
||||
.alpha_char = yp_encoding_windows_1251_alpha_char,
|
||||
.isupper_char = yp_encoding_windows_1251_isupper_char
|
||||
.isupper_char = yp_encoding_windows_1251_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_windows_1252 = {
|
||||
.char_width = yp_encoding_single_char_width,
|
||||
.alnum_char = yp_encoding_windows_1252_alnum_char,
|
||||
.alpha_char = yp_encoding_windows_1252_alpha_char,
|
||||
.isupper_char = yp_encoding_windows_1252_isupper_char
|
||||
.isupper_char = yp_encoding_windows_1252_isupper_char,
|
||||
.multibyte = false
|
||||
};
|
||||
|
@ -77,5 +77,6 @@ yp_encoding_t yp_encoding_windows_31j = {
|
||||
.char_width = yp_encoding_windows_31j_char_width,
|
||||
.alnum_char = yp_encoding_windows_31j_alnum_char,
|
||||
.alpha_char = yp_encoding_windows_31j_alpha_char,
|
||||
.isupper_char = yp_encoding_windows_31j_isupper_char
|
||||
.isupper_char = yp_encoding_windows_31j_isupper_char,
|
||||
.multibyte = true
|
||||
};
|
||||
|
459
yarp/extension.c
459
yarp/extension.c
@ -1,6 +1,7 @@
|
||||
#include "yarp/extension.h"
|
||||
|
||||
VALUE rb_cYARP;
|
||||
VALUE rb_cYARPSource;
|
||||
VALUE rb_cYARPToken;
|
||||
VALUE rb_cYARPLocation;
|
||||
|
||||
@ -9,51 +10,97 @@ VALUE rb_cYARPParseError;
|
||||
VALUE rb_cYARPParseWarning;
|
||||
VALUE rb_cYARPParseResult;
|
||||
|
||||
// Represents a source of Ruby code. It can either be coming from a file or a
|
||||
// string. If it's a file, it's going to mmap the contents of the file. If it's
|
||||
// a string it's going to just point to the contents of the string.
|
||||
/******************************************************************************/
|
||||
/* IO of Ruby code */
|
||||
/******************************************************************************/
|
||||
|
||||
// Represents an input of Ruby code. It can either be coming from a file or a
|
||||
// string. If it's a file, we'll use demand paging to read the contents of the
|
||||
// file into a string. If it's already a string, we'll reference it directly.
|
||||
typedef struct {
|
||||
enum { SOURCE_FILE, SOURCE_STRING } type;
|
||||
const char *source;
|
||||
size_t size;
|
||||
} source_t;
|
||||
} input_t;
|
||||
|
||||
// Check if the given filepath is a string. If it's nil, then return NULL. If
|
||||
// it's not a string, then raise a type error. Otherwise return the filepath as
|
||||
// a C string.
|
||||
static const char *
|
||||
check_filepath(VALUE filepath) {
|
||||
// If the filepath is nil, then we don't need to do anything.
|
||||
if (NIL_P(filepath)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Check if the filepath is a string. If it's not, then raise a type error.
|
||||
if (!RB_TYPE_P(filepath, T_STRING)) {
|
||||
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
|
||||
}
|
||||
|
||||
// Otherwise, return the filepath as a C string.
|
||||
return StringValueCStr(filepath);
|
||||
}
|
||||
|
||||
// Read the file indicated by the filepath parameter into source and load its
|
||||
// contents and size into the given source_t.
|
||||
// contents and size into the given input_t.
|
||||
//
|
||||
// We want to use demand paging as much as possible in order to avoid having to
|
||||
// read the entire file into memory (which could be detrimental to performance
|
||||
// for large files). This means that if we're on windows we'll use
|
||||
// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
|
||||
// `mmap`, and on other POSIX systems we'll use `read`.
|
||||
static int
|
||||
source_file_load(source_t *source, VALUE filepath) {
|
||||
input_load_filepath(input_t *input, const char *filepath) {
|
||||
#ifdef _WIN32
|
||||
HANDLE file = CreateFile(
|
||||
StringValueCStr(filepath),
|
||||
GENERIC_READ,
|
||||
0,
|
||||
NULL,
|
||||
OPEN_EXISTING,
|
||||
FILE_ATTRIBUTE_NORMAL,
|
||||
NULL
|
||||
);
|
||||
// Open the file for reading.
|
||||
HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
|
||||
|
||||
if (file == INVALID_HANDLE_VALUE) {
|
||||
perror("Invalid handle for file");
|
||||
perror("CreateFile failed");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Get the file size.
|
||||
DWORD file_size = GetFileSize(file, NULL);
|
||||
source->source = malloc(file_size);
|
||||
if (file_size == INVALID_FILE_SIZE) {
|
||||
CloseHandle(file);
|
||||
perror("GetFileSize failed");
|
||||
return 1;
|
||||
}
|
||||
|
||||
DWORD bytes_read;
|
||||
BOOL success = ReadFile(file, DISCARD_CONST_QUAL(void *, source->source), file_size, &bytes_read, NULL);
|
||||
// If the file is empty, then we don't need to do anything else, we'll set
|
||||
// the source to a constant empty string and return.
|
||||
if (!file_size) {
|
||||
CloseHandle(file);
|
||||
input->size = 0;
|
||||
input->source = "";
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Create a mapping of the file.
|
||||
HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
|
||||
if (mapping == NULL) {
|
||||
CloseHandle(file);
|
||||
perror("CreateFileMapping failed");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Map the file into memory.
|
||||
input->source = (const char *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
|
||||
CloseHandle(mapping);
|
||||
CloseHandle(file);
|
||||
|
||||
if (!success) {
|
||||
perror("ReadFile failed");
|
||||
if (input->source == NULL) {
|
||||
perror("MapViewOfFile failed");
|
||||
return 1;
|
||||
}
|
||||
|
||||
source->size = (size_t) file_size;
|
||||
// Set the size of the source.
|
||||
input->size = (size_t) file_size;
|
||||
return 0;
|
||||
#else
|
||||
// Open the file for reading
|
||||
int fd = open(StringValueCStr(filepath), O_RDONLY);
|
||||
int fd = open(filepath, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
perror("open");
|
||||
return 1;
|
||||
@ -68,30 +115,30 @@ source_file_load(source_t *source, VALUE filepath) {
|
||||
}
|
||||
|
||||
// mmap the file descriptor to virtually get the contents
|
||||
source->size = sb.st_size;
|
||||
input->size = sb.st_size;
|
||||
|
||||
#ifdef HAVE_MMAP
|
||||
if (!source->size) {
|
||||
if (!input->size) {
|
||||
close(fd);
|
||||
source->source = "";
|
||||
input->source = "";
|
||||
return 0;
|
||||
}
|
||||
|
||||
char * res = mmap(NULL, source->size, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||
if (res == MAP_FAILED) {
|
||||
const char *result = mmap(NULL, input->size, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||
if (result == MAP_FAILED) {
|
||||
perror("Map failed");
|
||||
return 1;
|
||||
} else {
|
||||
source->source = res;
|
||||
input->source = result;
|
||||
}
|
||||
#else
|
||||
source->source = malloc(source->size);
|
||||
if (source->source == NULL) return 1;
|
||||
input->source = malloc(input->size);
|
||||
if (input->source == NULL) return 1;
|
||||
|
||||
ssize_t read_size = read(fd, (void *)source->source, source->size);
|
||||
if (read_size < 0 || (size_t)read_size != source->size) {
|
||||
ssize_t read_size = read(fd, (void *) input->source, input->size);
|
||||
if (read_size < 0 || (size_t)read_size != input->size) {
|
||||
perror("Read size is incorrect");
|
||||
free((void *)source->source);
|
||||
free((void *) input->source);
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
@ -101,86 +148,106 @@ source_file_load(source_t *source, VALUE filepath) {
|
||||
#endif
|
||||
}
|
||||
|
||||
// Load the contents and size of the given string into the given source_t.
|
||||
// Load the contents and size of the given string into the given input_t.
|
||||
static void
|
||||
source_string_load(source_t *source, VALUE string) {
|
||||
*source = (source_t) {
|
||||
.type = SOURCE_STRING,
|
||||
.source = RSTRING_PTR(string),
|
||||
.size = RSTRING_LEN(string),
|
||||
};
|
||||
input_load_string(input_t *input, VALUE string) {
|
||||
// Check if the string is a string. If it's not, then raise a type error.
|
||||
if (!RB_TYPE_P(string, T_STRING)) {
|
||||
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string));
|
||||
}
|
||||
|
||||
input->source = RSTRING_PTR(string);
|
||||
input->size = RSTRING_LEN(string);
|
||||
}
|
||||
|
||||
// Free any resources associated with the given source_t.
|
||||
// Free any resources associated with the given input_t. This is the corollary
|
||||
// function to source_file_load. It will unmap the file if it was mapped, or
|
||||
// free the memory if it was allocated.
|
||||
static void
|
||||
source_file_unload(source_t *source) {
|
||||
#ifdef _WIN32
|
||||
free((void *)source->source);
|
||||
input_unload_filepath(input_t *input) {
|
||||
// We don't need to free anything with 0 sized files because we handle that
|
||||
// with a constant string instead.
|
||||
if (!input->size) return;
|
||||
void *memory = (void *) input->source;
|
||||
|
||||
#if defined(_WIN32)
|
||||
UnmapViewOfFile(memory);
|
||||
#elif defined(HAVE_MMAP)
|
||||
munmap(memory, input->size);
|
||||
#else
|
||||
#ifdef HAVE_MMAP
|
||||
munmap((void *)source->source, source->size);
|
||||
#else
|
||||
free((void *)source->source);
|
||||
#endif
|
||||
free(memory);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Dump the AST corresponding to the given source to a string.
|
||||
/******************************************************************************/
|
||||
/* Serializing the AST */
|
||||
/******************************************************************************/
|
||||
|
||||
// Dump the AST corresponding to the given input to a string.
|
||||
static VALUE
|
||||
dump_source(source_t *source, const char *filepath) {
|
||||
dump_input(input_t *input, const char *filepath) {
|
||||
yp_buffer_t buffer;
|
||||
if (!yp_buffer_init(&buffer)) {
|
||||
rb_raise(rb_eNoMemError, "failed to allocate memory");
|
||||
}
|
||||
|
||||
yp_parser_t parser;
|
||||
yp_parser_init(&parser, source->source, source->size, filepath);
|
||||
yp_parser_init(&parser, input->source, input->size, filepath);
|
||||
|
||||
yp_node_t *node = yp_parse(&parser);
|
||||
|
||||
yp_buffer_t buffer;
|
||||
if (!yp_buffer_init(&buffer)) rb_raise(rb_eNoMemError, "failed to allocate memory");
|
||||
|
||||
yp_serialize(&parser, node, &buffer);
|
||||
VALUE dumped = rb_str_new(buffer.value, buffer.length);
|
||||
|
||||
VALUE result = rb_str_new(buffer.value, buffer.length);
|
||||
yp_node_destroy(&parser, node);
|
||||
yp_buffer_free(&buffer);
|
||||
yp_parser_free(&parser);
|
||||
|
||||
return dumped;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Dump the AST corresponding to the given string to a string.
|
||||
static VALUE
|
||||
dump(VALUE self, VALUE string, VALUE filepath) {
|
||||
source_t source;
|
||||
source_string_load(&source, string);
|
||||
char *str = NULL;
|
||||
dump(int argc, VALUE *argv, VALUE self) {
|
||||
VALUE string;
|
||||
VALUE filepath;
|
||||
rb_scan_args(argc, argv, "11", &string, &filepath);
|
||||
|
||||
if (filepath != Qnil) {
|
||||
str = StringValueCStr(filepath);
|
||||
}
|
||||
|
||||
return dump_source(&source, str);
|
||||
input_t input;
|
||||
input_load_string(&input, string);
|
||||
return dump_input(&input, check_filepath(filepath));
|
||||
}
|
||||
|
||||
// Dump the AST corresponding to the given file to a string.
|
||||
static VALUE
|
||||
dump_file(VALUE self, VALUE filepath) {
|
||||
source_t source;
|
||||
if (source_file_load(&source, filepath) != 0) return Qnil;
|
||||
input_t input;
|
||||
|
||||
const char *checked = check_filepath(filepath);
|
||||
if (input_load_filepath(&input, checked) != 0) return Qnil;
|
||||
|
||||
VALUE value = dump_input(&input, checked);
|
||||
input_unload_filepath(&input);
|
||||
|
||||
VALUE value = dump_source(&source, StringValueCStr(filepath));
|
||||
source_file_unload(&source);
|
||||
return value;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/* Extracting values for the parse result */
|
||||
/******************************************************************************/
|
||||
|
||||
// Extract the comments out of the parser into an array.
|
||||
static VALUE
|
||||
parser_comments(yp_parser_t *parser) {
|
||||
parser_comments(yp_parser_t *parser, VALUE source) {
|
||||
VALUE comments = rb_ary_new();
|
||||
yp_comment_t *comment;
|
||||
|
||||
for (comment = (yp_comment_t *) parser->comment_list.head; comment != NULL; comment = (yp_comment_t *) comment->node.next) {
|
||||
VALUE location_argv[] = { LONG2FIX(comment->start - parser->start), LONG2FIX(comment->end - parser->start) };
|
||||
for (yp_comment_t *comment = (yp_comment_t *) parser->comment_list.head; comment != NULL; comment = (yp_comment_t *) comment->node.next) {
|
||||
VALUE location_argv[] = {
|
||||
source,
|
||||
LONG2FIX(comment->start - parser->start),
|
||||
LONG2FIX(comment->end - parser->start)
|
||||
};
|
||||
|
||||
VALUE type;
|
||||
|
||||
switch (comment->type) {
|
||||
case YP_COMMENT_INLINE:
|
||||
type = ID2SYM(rb_intern("inline"));
|
||||
@ -196,7 +263,7 @@ parser_comments(yp_parser_t *parser) {
|
||||
break;
|
||||
}
|
||||
|
||||
VALUE comment_argv[] = { type, rb_class_new_instance(2, location_argv, rb_cYARPLocation) };
|
||||
VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv, rb_cYARPLocation) };
|
||||
rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cYARPComment));
|
||||
}
|
||||
|
||||
@ -205,19 +272,20 @@ parser_comments(yp_parser_t *parser) {
|
||||
|
||||
// Extract the errors out of the parser into an array.
|
||||
static VALUE
|
||||
parser_errors(yp_parser_t *parser, rb_encoding *encoding) {
|
||||
parser_errors(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
||||
VALUE errors = rb_ary_new();
|
||||
yp_diagnostic_t *error;
|
||||
|
||||
for (error = (yp_diagnostic_t *) parser->error_list.head; error != NULL; error = (yp_diagnostic_t *) error->node.next) {
|
||||
VALUE location_argv[] = {
|
||||
source,
|
||||
LONG2FIX(error->start - parser->start),
|
||||
LONG2FIX(error->end - parser->start)
|
||||
};
|
||||
|
||||
VALUE error_argv[] = {
|
||||
rb_enc_str_new_cstr(error->message, encoding),
|
||||
rb_class_new_instance(2, location_argv, rb_cYARPLocation)
|
||||
rb_class_new_instance(3, location_argv, rb_cYARPLocation)
|
||||
};
|
||||
|
||||
rb_ary_push(errors, rb_class_new_instance(2, error_argv, rb_cYARPParseError));
|
||||
@ -228,19 +296,20 @@ parser_errors(yp_parser_t *parser, rb_encoding *encoding) {
|
||||
|
||||
// Extract the warnings out of the parser into an array.
|
||||
static VALUE
|
||||
parser_warnings(yp_parser_t *parser, rb_encoding *encoding) {
|
||||
parser_warnings(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
|
||||
VALUE warnings = rb_ary_new();
|
||||
yp_diagnostic_t *warning;
|
||||
|
||||
for (warning = (yp_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (yp_diagnostic_t *) warning->node.next) {
|
||||
VALUE location_argv[] = {
|
||||
source,
|
||||
LONG2FIX(warning->start - parser->start),
|
||||
LONG2FIX(warning->end - parser->start)
|
||||
};
|
||||
|
||||
VALUE warning_argv[] = {
|
||||
rb_enc_str_new_cstr(warning->message, encoding),
|
||||
rb_class_new_instance(2, location_argv, rb_cYARPLocation)
|
||||
rb_class_new_instance(3, location_argv, rb_cYARPLocation)
|
||||
};
|
||||
|
||||
rb_ary_push(warnings, rb_class_new_instance(2, warning_argv, rb_cYARPParseWarning));
|
||||
@ -249,22 +318,36 @@ parser_warnings(yp_parser_t *parser, rb_encoding *encoding) {
|
||||
return warnings;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/* Lexing Ruby code */
|
||||
/******************************************************************************/
|
||||
|
||||
// This struct gets stored in the parser and passed in to the lex callback any
|
||||
// time a new token is found. We use it to store the necessary information to
|
||||
// initialize a Token instance.
|
||||
typedef struct {
|
||||
VALUE source;
|
||||
VALUE tokens;
|
||||
rb_encoding *encoding;
|
||||
} lex_data_t;
|
||||
|
||||
// This is passed as a callback to the parser. It gets called every time a new
|
||||
// token is found. Once found, we initialize a new instance of Token and push it
|
||||
// onto the tokens array.
|
||||
static void
|
||||
lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
|
||||
lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
|
||||
|
||||
VALUE yields = rb_ary_new_capa(2);
|
||||
rb_ary_push(yields, yp_token_new(parser, token, lex_data->encoding));
|
||||
rb_ary_push(yields, yp_token_new(parser, token, lex_data->encoding, lex_data->source));
|
||||
rb_ary_push(yields, INT2FIX(parser->lex_state));
|
||||
|
||||
rb_ary_push(lex_data->tokens, yields);
|
||||
}
|
||||
|
||||
// This is called whenever the encoding changes based on the magic comment at
|
||||
// the top of the file. We use it to update the encoding that we are using to
|
||||
// create tokens.
|
||||
static void
|
||||
lex_encoding_changed_callback(yp_parser_t *parser) {
|
||||
lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
|
||||
@ -273,30 +356,42 @@ lex_encoding_changed_callback(yp_parser_t *parser) {
|
||||
|
||||
// Return an array of tokens corresponding to the given source.
|
||||
static VALUE
|
||||
lex_source(source_t *source, char *filepath) {
|
||||
lex_input(input_t *input, const char *filepath) {
|
||||
yp_parser_t parser;
|
||||
yp_parser_init(&parser, source->source, source->size, filepath);
|
||||
yp_parser_init(&parser, input->source, input->size, filepath);
|
||||
yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback);
|
||||
|
||||
VALUE offsets = rb_ary_new();
|
||||
VALUE source_argv[] = { rb_str_new(input->source, input->size), offsets };
|
||||
VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
|
||||
|
||||
lex_data_t lex_data = {
|
||||
.source = source,
|
||||
.tokens = rb_ary_new(),
|
||||
.encoding = rb_utf8_encoding()
|
||||
};
|
||||
|
||||
void *data = (void *) &lex_data;
|
||||
lex_data_t *data = &lex_data;
|
||||
yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
|
||||
.data = data,
|
||||
.data = (void *) data,
|
||||
.callback = lex_token,
|
||||
};
|
||||
|
||||
parser.lex_callback = &lex_callback;
|
||||
yp_node_t *node = yp_parse(&parser);
|
||||
|
||||
// Here we need to update the source range to have the correct newline
|
||||
// offsets. We do it here because we've already created the object and given
|
||||
// it over to all of the tokens.
|
||||
for (size_t index = 0; index < parser.newline_list.size; index++) {
|
||||
rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
|
||||
}
|
||||
|
||||
VALUE result_argv[] = {
|
||||
lex_data.tokens,
|
||||
parser_comments(&parser),
|
||||
parser_errors(&parser, lex_data.encoding),
|
||||
parser_warnings(&parser, lex_data.encoding)
|
||||
parser_comments(&parser, source),
|
||||
parser_errors(&parser, lex_data.encoding, source),
|
||||
parser_warnings(&parser, lex_data.encoding, source)
|
||||
};
|
||||
|
||||
VALUE result = rb_class_new_instance(4, result_argv, rb_cYARPParseResult);
|
||||
@ -309,40 +404,49 @@ lex_source(source_t *source, char *filepath) {
|
||||
|
||||
// Return an array of tokens corresponding to the given string.
|
||||
static VALUE
|
||||
lex(VALUE self, VALUE string, VALUE filepath) {
|
||||
source_t source;
|
||||
source_string_load(&source, string);
|
||||
char *filepath_char = NULL;
|
||||
if (filepath) {
|
||||
filepath_char = StringValueCStr(filepath);
|
||||
}
|
||||
return lex_source(&source, filepath_char);
|
||||
lex(int argc, VALUE *argv, VALUE self) {
|
||||
VALUE string;
|
||||
VALUE filepath;
|
||||
rb_scan_args(argc, argv, "11", &string, &filepath);
|
||||
|
||||
input_t input;
|
||||
input_load_string(&input, string);
|
||||
return lex_input(&input, check_filepath(filepath));
|
||||
}
|
||||
|
||||
// Return an array of tokens corresponding to the given file.
|
||||
static VALUE
|
||||
lex_file(VALUE self, VALUE filepath) {
|
||||
source_t source;
|
||||
if (source_file_load(&source, filepath) != 0) return Qnil;
|
||||
input_t input;
|
||||
|
||||
const char *checked = check_filepath(filepath);
|
||||
if (input_load_filepath(&input, checked) != 0) return Qnil;
|
||||
|
||||
VALUE value = lex_input(&input, checked);
|
||||
input_unload_filepath(&input);
|
||||
|
||||
VALUE value = lex_source(&source, StringValueCStr(filepath));
|
||||
source_file_unload(&source);
|
||||
return value;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/* Parsing Ruby code */
|
||||
/******************************************************************************/
|
||||
|
||||
// Parse the given input and return a ParseResult instance.
|
||||
static VALUE
|
||||
parse_source(source_t *source, char *filepath) {
|
||||
parse_input(input_t *input, const char *filepath) {
|
||||
yp_parser_t parser;
|
||||
yp_parser_init(&parser, source->source, source->size, filepath);
|
||||
yp_parser_init(&parser, input->source, input->size, filepath);
|
||||
|
||||
yp_node_t *node = yp_parse(&parser);
|
||||
rb_encoding *encoding = rb_enc_find(parser.encoding.name);
|
||||
|
||||
VALUE source = yp_source_new(&parser);
|
||||
VALUE result_argv[] = {
|
||||
yp_ast_new(&parser, node, encoding),
|
||||
parser_comments(&parser),
|
||||
parser_errors(&parser, encoding),
|
||||
parser_warnings(&parser, encoding)
|
||||
parser_comments(&parser, source),
|
||||
parser_errors(&parser, encoding, source),
|
||||
parser_warnings(&parser, encoding, source)
|
||||
};
|
||||
|
||||
VALUE result = rb_class_new_instance(4, result_argv, rb_cYARPParseResult);
|
||||
@ -353,40 +457,58 @@ parse_source(source_t *source, char *filepath) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// Parse the given string and return a ParseResult instance.
|
||||
static VALUE
|
||||
parse(VALUE self, VALUE string, VALUE filepath) {
|
||||
source_t source;
|
||||
source_string_load(&source, string);
|
||||
parse(int argc, VALUE *argv, VALUE self) {
|
||||
VALUE string;
|
||||
VALUE filepath;
|
||||
rb_scan_args(argc, argv, "11", &string, &filepath);
|
||||
|
||||
input_t input;
|
||||
input_load_string(&input, string);
|
||||
|
||||
#ifdef YARP_DEBUG_MODE_BUILD
|
||||
char* dup = malloc(source.size);
|
||||
memcpy(dup, source.source, source.size);
|
||||
source.source = dup;
|
||||
char* dup = malloc(input.size);
|
||||
memcpy(dup, input.source, input.size);
|
||||
input.source = dup;
|
||||
#endif
|
||||
VALUE value = parse_source(&source, NIL_P(filepath) ? NULL : StringValueCStr(filepath));
|
||||
|
||||
VALUE value = parse_input(&input, check_filepath(filepath));
|
||||
|
||||
#ifdef YARP_DEBUG_MODE_BUILD
|
||||
free(dup);
|
||||
#endif
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
// Parse the given file and return a ParseResult instance.
|
||||
static VALUE
|
||||
parse_file(VALUE self, VALUE rb_filepath) {
|
||||
source_t source;
|
||||
if (source_file_load(&source, rb_filepath) != 0) {
|
||||
return Qnil;
|
||||
}
|
||||
parse_file(VALUE self, VALUE filepath) {
|
||||
input_t input;
|
||||
|
||||
const char *checked = check_filepath(filepath);
|
||||
if (input_load_filepath(&input, checked) != 0) return Qnil;
|
||||
|
||||
VALUE value = parse_input(&input, checked);
|
||||
input_unload_filepath(&input);
|
||||
|
||||
VALUE value = parse_source(&source, StringValueCStr(rb_filepath));
|
||||
source_file_unload(&source);
|
||||
return value;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/* Utility functions exposed to make testing easier */
|
||||
/******************************************************************************/
|
||||
|
||||
// Returns an array of strings corresponding to the named capture groups in the
|
||||
// given source string. If YARP was unable to parse the regular expression, this
|
||||
// function returns nil.
|
||||
static VALUE
|
||||
named_captures(VALUE self, VALUE rb_source) {
|
||||
named_captures(VALUE self, VALUE source) {
|
||||
yp_string_list_t string_list;
|
||||
yp_string_list_init(&string_list);
|
||||
|
||||
if (!yp_regexp_named_capture_group_names(RSTRING_PTR(rb_source), RSTRING_LEN(rb_source), &string_list)) {
|
||||
if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list)) {
|
||||
yp_string_list_free(&string_list);
|
||||
return Qnil;
|
||||
}
|
||||
@ -401,6 +523,8 @@ named_captures(VALUE self, VALUE rb_source) {
|
||||
return names;
|
||||
}
|
||||
|
||||
// Accepts a source string and a type of unescaping and returns the unescaped
|
||||
// version.
|
||||
static VALUE
|
||||
unescape(VALUE source, yp_unescape_type_t unescape_type) {
|
||||
yp_string_t string;
|
||||
@ -409,7 +533,13 @@ unescape(VALUE source, yp_unescape_type_t unescape_type) {
|
||||
yp_list_t error_list;
|
||||
yp_list_init(&error_list);
|
||||
|
||||
yp_unescape_manipulate_string(RSTRING_PTR(source), RSTRING_LEN(source), &string, unescape_type, &error_list);
|
||||
const char *start = RSTRING_PTR(source);
|
||||
size_t length = RSTRING_LEN(source);
|
||||
|
||||
yp_parser_t parser;
|
||||
yp_parser_init(&parser, start, length, "");
|
||||
|
||||
yp_unescape_manipulate_string(&parser, start, length, &string, unescape_type, &error_list);
|
||||
if (yp_list_empty_p(&error_list)) {
|
||||
result = rb_str_new(yp_string_source(&string), yp_string_length(&string));
|
||||
} else {
|
||||
@ -418,27 +548,32 @@ unescape(VALUE source, yp_unescape_type_t unescape_type) {
|
||||
|
||||
yp_string_free(&string);
|
||||
yp_list_free(&error_list);
|
||||
yp_parser_free(&parser);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Do not unescape anything in the given string. This is here to provide a
|
||||
// consistent API.
|
||||
static VALUE
|
||||
unescape_none(VALUE self, VALUE source) {
|
||||
return unescape(source, YP_UNESCAPE_NONE);
|
||||
}
|
||||
|
||||
// Minimally unescape the given string. This means effectively unescaping just
|
||||
// the quotes of a string. Returns the unescaped string.
|
||||
static VALUE
|
||||
unescape_minimal(VALUE self, VALUE source) {
|
||||
return unescape(source, YP_UNESCAPE_MINIMAL);
|
||||
}
|
||||
|
||||
// Unescape everything in the given string. Return the unescaped string.
|
||||
static VALUE
|
||||
unescape_all(VALUE self, VALUE source) {
|
||||
return unescape(source, YP_UNESCAPE_ALL);
|
||||
}
|
||||
|
||||
// This function returns a hash of information about the given source string's
|
||||
// memory usage.
|
||||
// Return a hash of information about the given source string's memory usage.
|
||||
static VALUE
|
||||
memsize(VALUE self, VALUE string) {
|
||||
yp_parser_t parser;
|
||||
@ -459,28 +594,17 @@ memsize(VALUE self, VALUE string) {
|
||||
return result;
|
||||
}
|
||||
|
||||
static VALUE
|
||||
compile(VALUE self, VALUE string) {
|
||||
yp_parser_t parser;
|
||||
size_t length = RSTRING_LEN(string);
|
||||
yp_parser_init(&parser, RSTRING_PTR(string), length, NULL);
|
||||
|
||||
yp_node_t *node = yp_parse(&parser);
|
||||
VALUE result = yp_compile(node);
|
||||
|
||||
yp_node_destroy(&parser, node);
|
||||
yp_parser_free(&parser);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Parse the file, but do nothing with the result. This is used to profile the
|
||||
// parser for memory and speed.
|
||||
static VALUE
|
||||
profile_file(VALUE self, VALUE filepath) {
|
||||
source_t source;
|
||||
if (source_file_load(&source, filepath) != 0) return Qnil;
|
||||
input_t input;
|
||||
|
||||
const char *checked = check_filepath(filepath);
|
||||
if (input_load_filepath(&input, checked) != 0) return Qnil;
|
||||
|
||||
yp_parser_t parser;
|
||||
yp_parser_init(&parser, source.source, source.size, StringValueCStr(filepath));
|
||||
yp_parser_init(&parser, input.source, input.size, checked);
|
||||
|
||||
yp_node_t *node = yp_parse(&parser);
|
||||
yp_node_destroy(&parser, node);
|
||||
@ -491,9 +615,8 @@ profile_file(VALUE self, VALUE filepath) {
|
||||
|
||||
// The function takes a source string and returns a Ruby array containing the
|
||||
// offsets of every newline in the string. (It also includes a 0 at the
|
||||
// beginning to indicate the position of the first line.)
|
||||
//
|
||||
// It accepts a string as its only argument and returns an array of integers.
|
||||
// beginning to indicate the position of the first line.) It accepts a string as
|
||||
// its only argument and returns an array of integers.
|
||||
static VALUE
|
||||
newlines(VALUE self, VALUE string) {
|
||||
yp_parser_t parser;
|
||||
@ -512,46 +635,56 @@ newlines(VALUE self, VALUE string) {
|
||||
return result;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/* Initialization of the extension */
|
||||
/******************************************************************************/
|
||||
|
||||
RUBY_FUNC_EXPORTED void
|
||||
Init_yarp(void) {
|
||||
// Make sure that the YARP library version matches the expected version.
|
||||
// Otherwise something was compiled incorrectly.
|
||||
if (strcmp(yp_version(), EXPECTED_YARP_VERSION) != 0) {
|
||||
rb_raise(rb_eRuntimeError, "The YARP library version (%s) does not match the expected version (%s)", yp_version(),
|
||||
EXPECTED_YARP_VERSION);
|
||||
rb_raise(
|
||||
rb_eRuntimeError,
|
||||
"The YARP library version (%s) does not match the expected version (%s)",
|
||||
yp_version(),
|
||||
EXPECTED_YARP_VERSION
|
||||
);
|
||||
}
|
||||
|
||||
// Grab up references to all of the constants that we're going to need to
|
||||
// reference throughout this extension.
|
||||
rb_cYARP = rb_define_module("YARP");
|
||||
rb_cYARPSource = rb_define_class_under(rb_cYARP, "Source", rb_cObject);
|
||||
rb_cYARPToken = rb_define_class_under(rb_cYARP, "Token", rb_cObject);
|
||||
rb_cYARPLocation = rb_define_class_under(rb_cYARP, "Location", rb_cObject);
|
||||
|
||||
rb_cYARPComment = rb_define_class_under(rb_cYARP, "Comment", rb_cObject);
|
||||
rb_cYARPParseError = rb_define_class_under(rb_cYARP, "ParseError", rb_cObject);
|
||||
rb_cYARPParseWarning = rb_define_class_under(rb_cYARP, "ParseWarning", rb_cObject);
|
||||
rb_cYARPParseResult = rb_define_class_under(rb_cYARP, "ParseResult", rb_cObject);
|
||||
|
||||
rb_define_const(rb_cYARP, "VERSION", rb_sprintf("%d.%d.%d", YP_VERSION_MAJOR, YP_VERSION_MINOR, YP_VERSION_PATCH));
|
||||
// Define the version string here so that we can use the constants defined
|
||||
// in yarp.h.
|
||||
rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));
|
||||
|
||||
rb_define_singleton_method(rb_cYARP, "dump", dump, 2);
|
||||
// First, the functions that have to do with lexing and parsing.
|
||||
rb_define_singleton_method(rb_cYARP, "dump", dump, -1);
|
||||
rb_define_singleton_method(rb_cYARP, "dump_file", dump_file, 1);
|
||||
|
||||
rb_define_singleton_method(rb_cYARP, "lex", lex, 2);
|
||||
rb_define_singleton_method(rb_cYARP, "lex", lex, -1);
|
||||
rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
|
||||
|
||||
rb_define_singleton_method(rb_cYARP, "_parse", parse, 2);
|
||||
rb_define_singleton_method(rb_cYARP, "parse", parse, -1);
|
||||
rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);
|
||||
|
||||
// Next, the functions that will be called by the parser to perform various
|
||||
// internal tasks. We expose these to make them easier to test.
|
||||
rb_define_singleton_method(rb_cYARP, "named_captures", named_captures, 1);
|
||||
|
||||
rb_define_singleton_method(rb_cYARP, "unescape_none", unescape_none, 1);
|
||||
rb_define_singleton_method(rb_cYARP, "unescape_minimal", unescape_minimal, 1);
|
||||
rb_define_singleton_method(rb_cYARP, "unescape_all", unescape_all, 1);
|
||||
|
||||
rb_define_singleton_method(rb_cYARP, "memsize", memsize, 1);
|
||||
|
||||
rb_define_singleton_method(rb_cYARP, "compile", compile, 1);
|
||||
|
||||
rb_define_singleton_method(rb_cYARP, "profile_file", profile_file, 1);
|
||||
|
||||
rb_define_singleton_method(rb_cYARP, "newlines", newlines, 1);
|
||||
|
||||
// Next, initialize the pack API.
|
||||
Init_yarp_pack();
|
||||
}
|
||||
|
@ -5,11 +5,11 @@
|
||||
#include <ruby/encoding.h>
|
||||
#include "yarp.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
|
||||
// The following headers are necessary to read files using demand paging.
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
@ -17,16 +17,11 @@
|
||||
|
||||
#define EXPECTED_YARP_VERSION "0.4.0"
|
||||
|
||||
VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding);
|
||||
|
||||
VALUE yp_source_new(yp_parser_t *parser);
|
||||
VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
|
||||
VALUE yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding);
|
||||
|
||||
VALUE yp_compile(yp_node_t *node);
|
||||
|
||||
void Init_yarp_pack(void);
|
||||
|
||||
YP_EXPORTED_FUNCTION void Init_yarp(void);
|
||||
|
||||
#define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v))
|
||||
|
||||
#endif // YARP_EXT_NODE_H
|
||||
#endif
|
||||
|
@ -1,20 +0,0 @@
|
||||
#ifndef YARP_MISSING_H
|
||||
#define YARP_MISSING_H
|
||||
|
||||
#include "yarp/defines.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
const char * yp_strnstr(const char *haystack, const char *needle, size_t length);
|
||||
|
||||
int yp_strncasecmp(const char *string1, const char *string2, size_t length);
|
||||
|
||||
#ifndef HAVE_STRNCASECMP
|
||||
#ifndef strncasecmp
|
||||
#define strncasecmp yp_strncasecmp
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
16
yarp/node.h
16
yarp/node.h
@ -2,8 +2,6 @@
|
||||
#define YARP_NODE_H
|
||||
|
||||
#include "yarp/defines.h"
|
||||
|
||||
#include "yarp.h"
|
||||
#include "yarp/parser.h"
|
||||
|
||||
// Append a token to the given list.
|
||||
@ -15,6 +13,20 @@ void yp_node_list_append(yp_node_list_t *list, yp_node_t *node);
|
||||
// Clear the node but preserves the location.
|
||||
void yp_node_clear(yp_node_t *node);
|
||||
|
||||
// Deallocate a node and all of its children.
|
||||
YP_EXPORTED_FUNCTION void yp_node_destroy(yp_parser_t *parser, struct yp_node *node);
|
||||
|
||||
// This struct stores the information gathered by the yp_node_memsize function.
|
||||
// It contains both the memory footprint and additionally metadata about the
|
||||
// shape of the tree.
|
||||
typedef struct {
|
||||
size_t memsize;
|
||||
size_t node_count;
|
||||
} yp_memsize_t;
|
||||
|
||||
// Calculates the memory footprint of a given node.
|
||||
YP_EXPORTED_FUNCTION void yp_node_memsize(yp_node_t *node, yp_memsize_t *memsize);
|
||||
|
||||
#define YP_EMPTY_NODE_LIST ((yp_node_list_t) { .nodes = NULL, .size = 0, .capacity = 0 })
|
||||
#define YP_EMPTY_LOCATION_LIST ((yp_location_list_t) { .locations = NULL, .size = 0, .capacity = 0 })
|
||||
|
||||
|
@ -3,8 +3,8 @@
|
||||
|
||||
#include "yarp/defines.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef enum yp_pack_version {
|
||||
YP_PACK_VERSION_3_2_0
|
||||
|
@ -1,17 +1,16 @@
|
||||
#ifndef YARP_PARSER_H
|
||||
#define YARP_PARSER_H
|
||||
|
||||
#include "yarp/defines.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "yarp/ast.h"
|
||||
#include "yarp/defines.h"
|
||||
#include "yarp/enc/yp_encoding.h"
|
||||
#include "yarp/util/yp_constant_pool.h"
|
||||
#include "yarp/util/yp_list.h"
|
||||
#include "yarp/util/yp_newline_list.h"
|
||||
#include "yarp/util/yp_state_stack.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
// This enum provides various bits that represent different kinds of states that
|
||||
// the lexer can track. This is used to determine which kind of token to return
|
||||
// based on the context of the parser.
|
||||
|
@ -5,6 +5,8 @@
|
||||
/* if you are looking to modify the */
|
||||
/* template */
|
||||
/******************************************************************************/
|
||||
#include "yarp/defines.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "yarp/ast.h"
|
||||
@ -14,7 +16,7 @@
|
||||
static void
|
||||
prettyprint_location(yp_buffer_t *buffer, yp_parser_t *parser, yp_location_t *location) {
|
||||
char printed[] = "[0000-0000]";
|
||||
sprintf(printed, "[%04ld-%04ld]", (long int)(location->start - parser->start), (long int)(location->end - parser->start));
|
||||
yp_snprintf(printed, sizeof(printed), "[%04ld-%04ld]", (long int)(location->start - parser->start), (long int)(location->end - parser->start));
|
||||
yp_buffer_append_str(buffer, printed, strlen(printed));
|
||||
}
|
||||
|
||||
@ -189,7 +191,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
for (uint32_t index = 0; index < ((yp_block_node_t *)node)->locals.size; index++) {
|
||||
if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
|
||||
char locals_buffer[12];
|
||||
sprintf(locals_buffer, "%u", ((yp_block_node_t *)node)->locals.ids[index]);
|
||||
yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_block_node_t *)node)->locals.ids[index]);
|
||||
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
|
||||
}
|
||||
yp_buffer_append_str(buffer, ", ", 2); if (((yp_block_node_t *)node)->parameters == NULL) {
|
||||
@ -291,7 +293,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
prettyprint_node(buffer, parser, (yp_node_t *)((yp_call_node_t *)node)->block);
|
||||
}
|
||||
yp_buffer_append_str(buffer, ", ", 2); char flags_buffer[12];
|
||||
sprintf(flags_buffer, "+%d", ((yp_call_node_t *)node)->flags);
|
||||
yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_call_node_t *)node)->flags);
|
||||
yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
|
||||
yp_buffer_append_str(buffer, ", ", 2); yp_buffer_append_str(buffer, "\"", 1);
|
||||
yp_buffer_append_str(buffer, yp_string_source(&((yp_call_node_t *)node)->name), yp_string_length(&((yp_call_node_t *)node)->name));
|
||||
@ -321,7 +323,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_call_operator_write_node_t *)node)->operator_loc);
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_call_operator_write_node_t *)node)->value);
|
||||
yp_buffer_append_str(buffer, ", ", 2); char operator_id_buffer[12];
|
||||
sprintf(operator_id_buffer, "%u", ((yp_call_operator_write_node_t *)node)->operator_id);
|
||||
yp_snprintf(operator_id_buffer, sizeof(operator_id_buffer), "%u", ((yp_call_operator_write_node_t *)node)->operator_id);
|
||||
yp_buffer_append_str(buffer, operator_id_buffer, strlen(operator_id_buffer));
|
||||
yp_buffer_append_str(buffer, ")", 1);
|
||||
break;
|
||||
@ -360,7 +362,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
for (uint32_t index = 0; index < ((yp_class_node_t *)node)->locals.size; index++) {
|
||||
if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
|
||||
char locals_buffer[12];
|
||||
sprintf(locals_buffer, "%u", ((yp_class_node_t *)node)->locals.ids[index]);
|
||||
yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_class_node_t *)node)->locals.ids[index]);
|
||||
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
|
||||
}
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_class_node_t *)node)->class_keyword_loc);
|
||||
@ -406,7 +408,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_class_variable_operator_write_node_t *)node)->operator_loc);
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_class_variable_operator_write_node_t *)node)->value);
|
||||
yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12];
|
||||
sprintf(operator_buffer, "%u", ((yp_class_variable_operator_write_node_t *)node)->operator);
|
||||
yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_class_variable_operator_write_node_t *)node)->operator);
|
||||
yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
|
||||
yp_buffer_append_str(buffer, ")", 1);
|
||||
break;
|
||||
@ -454,7 +456,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_constant_operator_write_node_t *)node)->operator_loc);
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_constant_operator_write_node_t *)node)->value);
|
||||
yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12];
|
||||
sprintf(operator_buffer, "%u", ((yp_constant_operator_write_node_t *)node)->operator);
|
||||
yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_constant_operator_write_node_t *)node)->operator);
|
||||
yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
|
||||
yp_buffer_append_str(buffer, ")", 1);
|
||||
break;
|
||||
@ -493,7 +495,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_constant_path_operator_write_node_t *)node)->operator_loc);
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_constant_path_operator_write_node_t *)node)->value);
|
||||
yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12];
|
||||
sprintf(operator_buffer, "%u", ((yp_constant_path_operator_write_node_t *)node)->operator);
|
||||
yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_constant_path_operator_write_node_t *)node)->operator);
|
||||
yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
|
||||
yp_buffer_append_str(buffer, ")", 1);
|
||||
break;
|
||||
@ -540,7 +542,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
yp_buffer_append_str(buffer, ", ", 2); for (uint32_t index = 0; index < ((yp_def_node_t *)node)->locals.size; index++) {
|
||||
if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
|
||||
char locals_buffer[12];
|
||||
sprintf(locals_buffer, "%u", ((yp_def_node_t *)node)->locals.ids[index]);
|
||||
yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_def_node_t *)node)->locals.ids[index]);
|
||||
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
|
||||
}
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_def_node_t *)node)->def_keyword_loc);
|
||||
@ -734,7 +736,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_global_variable_operator_write_node_t *)node)->operator_loc);
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_global_variable_operator_write_node_t *)node)->value);
|
||||
yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12];
|
||||
sprintf(operator_buffer, "%u", ((yp_global_variable_operator_write_node_t *)node)->operator);
|
||||
yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_global_variable_operator_write_node_t *)node)->operator);
|
||||
yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
|
||||
yp_buffer_append_str(buffer, ")", 1);
|
||||
break;
|
||||
@ -871,7 +873,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_instance_variable_operator_write_node_t *)node)->operator_loc);
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_instance_variable_operator_write_node_t *)node)->value);
|
||||
yp_buffer_append_str(buffer, ", ", 2); char operator_buffer[12];
|
||||
sprintf(operator_buffer, "%u", ((yp_instance_variable_operator_write_node_t *)node)->operator);
|
||||
yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_instance_variable_operator_write_node_t *)node)->operator);
|
||||
yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
|
||||
yp_buffer_append_str(buffer, ")", 1);
|
||||
break;
|
||||
@ -911,7 +913,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
}
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_interpolated_regular_expression_node_t *)node)->closing_loc);
|
||||
yp_buffer_append_str(buffer, ", ", 2); char flags_buffer[12];
|
||||
sprintf(flags_buffer, "+%d", ((yp_interpolated_regular_expression_node_t *)node)->flags);
|
||||
yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_interpolated_regular_expression_node_t *)node)->flags);
|
||||
yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
|
||||
yp_buffer_append_str(buffer, ")", 1);
|
||||
break;
|
||||
@ -1001,7 +1003,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
for (uint32_t index = 0; index < ((yp_lambda_node_t *)node)->locals.size; index++) {
|
||||
if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
|
||||
char locals_buffer[12];
|
||||
sprintf(locals_buffer, "%u", ((yp_lambda_node_t *)node)->locals.ids[index]);
|
||||
yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_lambda_node_t *)node)->locals.ids[index]);
|
||||
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
|
||||
}
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_lambda_node_t *)node)->opening_loc);
|
||||
@ -1024,7 +1026,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_local_variable_operator_and_write_node_t *)node)->operator_loc);
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_and_write_node_t *)node)->value);
|
||||
yp_buffer_append_str(buffer, ", ", 2); char constant_id_buffer[12];
|
||||
sprintf(constant_id_buffer, "%u", ((yp_local_variable_operator_and_write_node_t *)node)->constant_id);
|
||||
yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_operator_and_write_node_t *)node)->constant_id);
|
||||
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
|
||||
yp_buffer_append_str(buffer, ")", 1);
|
||||
break;
|
||||
@ -1035,7 +1037,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_local_variable_operator_or_write_node_t *)node)->operator_loc);
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_or_write_node_t *)node)->value);
|
||||
yp_buffer_append_str(buffer, ", ", 2); char constant_id_buffer[12];
|
||||
sprintf(constant_id_buffer, "%u", ((yp_local_variable_operator_or_write_node_t *)node)->constant_id);
|
||||
yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_operator_or_write_node_t *)node)->constant_id);
|
||||
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
|
||||
yp_buffer_append_str(buffer, ")", 1);
|
||||
break;
|
||||
@ -1046,10 +1048,10 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_local_variable_operator_write_node_t *)node)->operator_loc);
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_write_node_t *)node)->value);
|
||||
yp_buffer_append_str(buffer, ", ", 2); char constant_id_buffer[12];
|
||||
sprintf(constant_id_buffer, "%u", ((yp_local_variable_operator_write_node_t *)node)->constant_id);
|
||||
yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_operator_write_node_t *)node)->constant_id);
|
||||
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
|
||||
yp_buffer_append_str(buffer, ", ", 2); char operator_id_buffer[12];
|
||||
sprintf(operator_id_buffer, "%u", ((yp_local_variable_operator_write_node_t *)node)->operator_id);
|
||||
yp_snprintf(operator_id_buffer, sizeof(operator_id_buffer), "%u", ((yp_local_variable_operator_write_node_t *)node)->operator_id);
|
||||
yp_buffer_append_str(buffer, operator_id_buffer, strlen(operator_id_buffer));
|
||||
yp_buffer_append_str(buffer, ")", 1);
|
||||
break;
|
||||
@ -1057,10 +1059,10 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
|
||||
yp_buffer_append_str(buffer, "LocalVariableReadNode(", 22);
|
||||
char constant_id_buffer[12];
|
||||
sprintf(constant_id_buffer, "%u", ((yp_local_variable_read_node_t *)node)->constant_id);
|
||||
yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_read_node_t *)node)->constant_id);
|
||||
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
|
||||
yp_buffer_append_str(buffer, ", ", 2); char depth_buffer[12];
|
||||
sprintf(depth_buffer, "+%d", ((yp_local_variable_read_node_t *)node)->depth);
|
||||
yp_snprintf(depth_buffer, sizeof(depth_buffer), "+%d", ((yp_local_variable_read_node_t *)node)->depth);
|
||||
yp_buffer_append_str(buffer, depth_buffer, strlen(depth_buffer));
|
||||
yp_buffer_append_str(buffer, ")", 1);
|
||||
break;
|
||||
@ -1068,10 +1070,10 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
case YP_NODE_LOCAL_VARIABLE_WRITE_NODE: {
|
||||
yp_buffer_append_str(buffer, "LocalVariableWriteNode(", 23);
|
||||
char constant_id_buffer[12];
|
||||
sprintf(constant_id_buffer, "%u", ((yp_local_variable_write_node_t *)node)->constant_id);
|
||||
yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_write_node_t *)node)->constant_id);
|
||||
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
|
||||
yp_buffer_append_str(buffer, ", ", 2); char depth_buffer[12];
|
||||
sprintf(depth_buffer, "+%d", ((yp_local_variable_write_node_t *)node)->depth);
|
||||
yp_snprintf(depth_buffer, sizeof(depth_buffer), "+%d", ((yp_local_variable_write_node_t *)node)->depth);
|
||||
yp_buffer_append_str(buffer, depth_buffer, strlen(depth_buffer));
|
||||
yp_buffer_append_str(buffer, ", ", 2); if (((yp_local_variable_write_node_t *)node)->value == NULL) {
|
||||
yp_buffer_append_str(buffer, "nil", 3);
|
||||
@ -1113,7 +1115,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
for (uint32_t index = 0; index < ((yp_module_node_t *)node)->locals.size; index++) {
|
||||
if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
|
||||
char locals_buffer[12];
|
||||
sprintf(locals_buffer, "%u", ((yp_module_node_t *)node)->locals.ids[index]);
|
||||
yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_module_node_t *)node)->locals.ids[index]);
|
||||
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
|
||||
}
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_module_node_t *)node)->module_keyword_loc);
|
||||
@ -1187,7 +1189,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
case YP_NODE_OPTIONAL_PARAMETER_NODE: {
|
||||
yp_buffer_append_str(buffer, "OptionalParameterNode(", 22);
|
||||
char constant_id_buffer[12];
|
||||
sprintf(constant_id_buffer, "%u", ((yp_optional_parameter_node_t *)node)->constant_id);
|
||||
yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_optional_parameter_node_t *)node)->constant_id);
|
||||
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_optional_parameter_node_t *)node)->name_loc);
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_optional_parameter_node_t *)node)->operator_loc);
|
||||
@ -1298,7 +1300,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
for (uint32_t index = 0; index < ((yp_program_node_t *)node)->locals.size; index++) {
|
||||
if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
|
||||
char locals_buffer[12];
|
||||
sprintf(locals_buffer, "%u", ((yp_program_node_t *)node)->locals.ids[index]);
|
||||
yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_program_node_t *)node)->locals.ids[index]);
|
||||
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
|
||||
}
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_node(buffer, parser, (yp_node_t *)((yp_program_node_t *)node)->statements);
|
||||
@ -1319,7 +1321,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
}
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_range_node_t *)node)->operator_loc);
|
||||
yp_buffer_append_str(buffer, ", ", 2); char flags_buffer[12];
|
||||
sprintf(flags_buffer, "+%d", ((yp_range_node_t *)node)->flags);
|
||||
yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_range_node_t *)node)->flags);
|
||||
yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
|
||||
yp_buffer_append_str(buffer, ")", 1);
|
||||
break;
|
||||
@ -1344,7 +1346,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
yp_buffer_append_str(buffer, yp_string_source(&((yp_regular_expression_node_t *)node)->unescaped), yp_string_length(&((yp_regular_expression_node_t *)node)->unescaped));
|
||||
yp_buffer_append_str(buffer, "\"", 1);
|
||||
yp_buffer_append_str(buffer, ", ", 2); char flags_buffer[12];
|
||||
sprintf(flags_buffer, "+%d", ((yp_regular_expression_node_t *)node)->flags);
|
||||
yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_regular_expression_node_t *)node)->flags);
|
||||
yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
|
||||
yp_buffer_append_str(buffer, ")", 1);
|
||||
break;
|
||||
@ -1363,7 +1365,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
case YP_NODE_REQUIRED_PARAMETER_NODE: {
|
||||
yp_buffer_append_str(buffer, "RequiredParameterNode(", 22);
|
||||
char constant_id_buffer[12];
|
||||
sprintf(constant_id_buffer, "%u", ((yp_required_parameter_node_t *)node)->constant_id);
|
||||
yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_required_parameter_node_t *)node)->constant_id);
|
||||
yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
|
||||
yp_buffer_append_str(buffer, ")", 1);
|
||||
break;
|
||||
@ -1443,7 +1445,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
|
||||
for (uint32_t index = 0; index < ((yp_singleton_class_node_t *)node)->locals.size; index++) {
|
||||
if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
|
||||
char locals_buffer[12];
|
||||
sprintf(locals_buffer, "%u", ((yp_singleton_class_node_t *)node)->locals.ids[index]);
|
||||
yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_singleton_class_node_t *)node)->locals.ids[index]);
|
||||
yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
|
||||
}
|
||||
yp_buffer_append_str(buffer, ", ", 2); prettyprint_location(buffer, parser, &((yp_singleton_class_node_t *)node)->class_keyword_loc);
|
||||
|
@ -374,7 +374,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
|
||||
case '#': { // inline comments
|
||||
bool found = yp_regexp_char_find(parser, ')');
|
||||
// the close paren we found is escaped, we need to find another
|
||||
while (parser->start <= parser->cursor - 2 && *(parser->cursor - 2) == '\\') {
|
||||
while (found && (parser->start <= parser->cursor - 2) && (*(parser->cursor - 2) == '\\')) {
|
||||
found = yp_regexp_char_find(parser, ')');
|
||||
}
|
||||
return found;
|
||||
|
@ -2,15 +2,14 @@
|
||||
#define YARP_REGEXP_H
|
||||
|
||||
#include "yarp/defines.h"
|
||||
|
||||
#include "yarp/parser.h"
|
||||
#include "yarp/util/yp_string_list.h"
|
||||
#include "yarp/util/yp_string.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "yarp/util/yp_string_list.h"
|
||||
#include "yarp/util/yp_string.h"
|
||||
|
||||
// Parse a regular expression and extract the names of all of the named capture
|
||||
// groups.
|
||||
YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures);
|
||||
|
@ -438,14 +438,14 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
||||
// \c? or \C-? delete, ASCII 7Fh (DEL)
|
||||
//
|
||||
YP_EXPORTED_FUNCTION void
|
||||
yp_unescape_manipulate_string(const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list) {
|
||||
yp_unescape_manipulate_string(yp_parser_t *parser, const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list) {
|
||||
if (unescape_type == YP_UNESCAPE_NONE) {
|
||||
// If we're not unescaping then we can reference the source directly.
|
||||
yp_string_shared_init(string, value, value + length);
|
||||
return;
|
||||
}
|
||||
|
||||
const char *backslash = memchr(value, '\\', length);
|
||||
const char *backslash = yp_memchr(parser, value, '\\', length);
|
||||
|
||||
if (backslash == NULL) {
|
||||
// Here there are no escapes, so we can reference the source directly.
|
||||
@ -509,7 +509,7 @@ yp_unescape_manipulate_string(const char *value, size_t length, yp_string_t *str
|
||||
}
|
||||
|
||||
if (end > cursor) {
|
||||
backslash = memchr(cursor, '\\', (size_t) (end - cursor));
|
||||
backslash = yp_memchr(parser, cursor, '\\', (size_t) (end - cursor));
|
||||
} else {
|
||||
backslash = NULL;
|
||||
}
|
||||
|
@ -2,17 +2,18 @@
|
||||
#define YARP_UNESCAPE_H
|
||||
|
||||
#include "yarp/defines.h"
|
||||
#include "yarp/diagnostic.h"
|
||||
#include "yarp/parser.h"
|
||||
#include "yarp/util/yp_char.h"
|
||||
#include "yarp/util/yp_list.h"
|
||||
#include "yarp/util/yp_memchr.h"
|
||||
#include "yarp/util/yp_string.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "yarp/diagnostic.h"
|
||||
#include "yarp/util/yp_char.h"
|
||||
#include "yarp/util/yp_list.h"
|
||||
#include "yarp/util/yp_string.h"
|
||||
|
||||
// The type of unescape we are performing.
|
||||
typedef enum {
|
||||
// When we're creating a string inside of a list literal like %w, we
|
||||
@ -30,7 +31,7 @@ typedef enum {
|
||||
|
||||
// Unescape the contents of the given token into the given string using the
|
||||
// given unescape mode.
|
||||
YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list);
|
||||
YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser, const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list);
|
||||
|
||||
YP_EXPORTED_FUNCTION size_t yp_unescape_calculate_difference(const char *value, const char *end, yp_unescape_type_t unescape_type, bool expect_single_codepoint, yp_list_t *error_list);
|
||||
|
||||
|
@ -2,12 +2,11 @@
|
||||
#define YP_CHAR_H
|
||||
|
||||
#include "yarp/defines.h"
|
||||
#include "yarp/util/yp_newline_list.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include "yarp/util/yp_newline_list.h"
|
||||
|
||||
// Returns the number of characters at the start of the string that are
|
||||
// whitespace. Disallows searching past the given maximum number of characters.
|
||||
size_t yp_strspn_whitespace(const char *string, ptrdiff_t length);
|
||||
|
@ -6,13 +6,13 @@
|
||||
#ifndef YP_CONSTANT_POOL_H
|
||||
#define YP_CONSTANT_POOL_H
|
||||
|
||||
#include "yarp/defines.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "yarp/defines.h"
|
||||
|
||||
typedef uint32_t yp_constant_id_t;
|
||||
|
||||
typedef struct {
|
||||
|
31
yarp/util/yp_memchr.c
Normal file
31
yarp/util/yp_memchr.c
Normal file
@ -0,0 +1,31 @@
|
||||
#include "yarp/util/yp_memchr.h"
|
||||
|
||||
#define YP_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
|
||||
|
||||
// We need to roll our own memchr to handle cases where the encoding changes and
|
||||
// we need to search for a character in a buffer that could be the trailing byte
|
||||
// of a multibyte character.
|
||||
void *
|
||||
yp_memchr(yp_parser_t *parser, const void *memory, int character, size_t number) {
|
||||
if (parser->encoding_changed && parser->encoding.multibyte && character >= YP_MEMCHR_TRAILING_BYTE_MINIMUM) {
|
||||
const char *source = (const char *) memory;
|
||||
size_t index = 0;
|
||||
|
||||
while (index < number) {
|
||||
if (source[index] == character) {
|
||||
return (void *) (source + index);
|
||||
}
|
||||
|
||||
size_t width = parser->encoding.char_width(source + index);
|
||||
if (width == 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
index += width;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
} else {
|
||||
return memchr(memory, character, number);
|
||||
}
|
||||
}
|
14
yarp/util/yp_memchr.h
Normal file
14
yarp/util/yp_memchr.h
Normal file
@ -0,0 +1,14 @@
|
||||
#ifndef YP_MEMCHR_H
|
||||
#define YP_MEMCHR_H
|
||||
|
||||
#include "yarp/defines.h"
|
||||
#include "yarp/parser.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
// We need to roll our own memchr to handle cases where the encoding changes and
|
||||
// we need to search for a character in a buffer that could be the trailing byte
|
||||
// of a multibyte character.
|
||||
void * yp_memchr(yp_parser_t *parser, const void *source, int character, size_t number);
|
||||
|
||||
#endif
|
@ -31,7 +31,7 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
|
||||
}
|
||||
|
||||
assert(cursor >= list->start);
|
||||
list->offsets[list->size++] = (size_t) (cursor - list->start);
|
||||
list->offsets[list->size++] = (size_t) (cursor - list->start + 1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -9,13 +9,13 @@
|
||||
#ifndef YP_NEWLINE_LIST_H
|
||||
#define YP_NEWLINE_LIST_H
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "yarp/defines.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// A list of offsets of newlines in a string. The offsets are assumed to be
|
||||
// sorted/inserted in ascending order.
|
||||
typedef struct {
|
||||
|
14
yarp/util/yp_snprintf.c
Normal file
14
yarp/util/yp_snprintf.c
Normal file
@ -0,0 +1,14 @@
|
||||
#include "yarp/defines.h"
|
||||
|
||||
#ifndef HAVE_SNPRINTF
|
||||
// In case snprintf isn't present on the system, we provide our own that simply
|
||||
// forwards to the less-safe sprintf.
|
||||
int
|
||||
yp_snprintf(char *dest, YP_ATTRIBUTE_UNUSED size_t size, const char *format, ...) {
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
int result = vsprintf(dest, format, args);
|
||||
va_end(args);
|
||||
return result;
|
||||
}
|
||||
#endif
|
@ -1,11 +1,5 @@
|
||||
#include "yarp/util/yp_string.h"
|
||||
|
||||
// Allocate a new yp_string_t.
|
||||
yp_string_t *
|
||||
yp_string_alloc(void) {
|
||||
return (yp_string_t *) malloc(sizeof(yp_string_t));
|
||||
}
|
||||
|
||||
// Initialize a shared string that is based on initial input.
|
||||
void
|
||||
yp_string_shared_init(yp_string_t *string, const char *start, const char *end) {
|
||||
|
@ -29,9 +29,6 @@ typedef struct {
|
||||
} as;
|
||||
} yp_string_t;
|
||||
|
||||
// Allocate a new yp_string_t.
|
||||
yp_string_t * yp_string_alloc(void);
|
||||
|
||||
// Initialize a shared string that is based on initial input.
|
||||
void yp_string_shared_init(yp_string_t *string, const char *start, const char *end);
|
||||
|
||||
|
@ -2,12 +2,11 @@
|
||||
#define YARP_STRING_LIST_H
|
||||
|
||||
#include "yarp/defines.h"
|
||||
#include "yarp/util/yp_string.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "yarp/util/yp_string.h"
|
||||
|
||||
typedef struct {
|
||||
yp_string_t *strings;
|
||||
size_t length;
|
||||
|
@ -1,19 +1,5 @@
|
||||
#include "yarp/missing.h"
|
||||
|
||||
const char *
|
||||
yp_strnstr(const char *haystack, const char *needle, size_t length) {
|
||||
size_t needle_length = strlen(needle);
|
||||
if (needle_length > length) return NULL;
|
||||
|
||||
const char *haystack_limit = haystack + length - needle_length + 1;
|
||||
|
||||
while ((haystack = memchr(haystack, needle[0], (size_t) (haystack_limit - haystack))) != NULL) {
|
||||
if (!strncmp(haystack, needle, needle_length)) return haystack;
|
||||
haystack++;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
#include <ctype.h>
|
||||
#include <stddef.h>
|
||||
|
||||
int
|
||||
yp_strncasecmp(const char *string1, const char *string2, size_t length) {
|
@ -1,5 +1,42 @@
|
||||
#include "yarp/util/yp_strpbrk.h"
|
||||
|
||||
// This is the slow path that does care about the encoding.
|
||||
static inline const char *
|
||||
yp_strpbrk_multi_byte(yp_parser_t *parser, const char *source, const char *charset, size_t maximum) {
|
||||
size_t index = 0;
|
||||
|
||||
while (index < maximum) {
|
||||
if (strchr(charset, source[index]) != NULL) {
|
||||
return source + index;
|
||||
}
|
||||
|
||||
size_t width = parser->encoding.char_width(source + index);
|
||||
if (width == 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
index += width;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// This is the fast path that does not care about the encoding.
|
||||
static inline const char *
|
||||
yp_strpbrk_single_byte(const char *source, const char *charset, size_t maximum) {
|
||||
size_t index = 0;
|
||||
|
||||
while (index < maximum) {
|
||||
if (strchr(charset, source[index]) != NULL) {
|
||||
return source + index;
|
||||
}
|
||||
|
||||
index++;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Here we have rolled our own version of strpbrk. The standard library strpbrk
|
||||
// has undefined behavior when the source string is not null-terminated. We want
|
||||
// to support strings that are not null-terminated because yp_parse does not
|
||||
@ -12,19 +49,18 @@
|
||||
// also don't want it to stop on null bytes. Ruby actually allows null bytes
|
||||
// within strings, comments, regular expressions, etc. So we need to be able to
|
||||
// skip past them.
|
||||
//
|
||||
// Finally, we want to support encodings wherein the charset could contain
|
||||
// characters that are trailing bytes of multi-byte characters. For example, in
|
||||
// Shift-JIS, the backslash character can be a trailing byte. In that case we
|
||||
// need to take a slower path and iterate one multi-byte character at a time.
|
||||
const char *
|
||||
yp_strpbrk(const char *source, const char *charset, ptrdiff_t length) {
|
||||
if (length < 0) return NULL;
|
||||
|
||||
size_t index = 0;
|
||||
size_t maximum = (size_t) length;
|
||||
|
||||
while (index < maximum) {
|
||||
if (strchr(charset, source[index]) != NULL) {
|
||||
return &source[index];
|
||||
}
|
||||
index++;
|
||||
yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length) {
|
||||
if (length <= 0) {
|
||||
return NULL;
|
||||
} else if (parser->encoding_changed && parser->encoding.multibyte) {
|
||||
return yp_strpbrk_multi_byte(parser, source, charset, (size_t) length);
|
||||
} else {
|
||||
return yp_strpbrk_single_byte(source, charset, (size_t) length);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define YP_STRPBRK_H
|
||||
|
||||
#include "yarp/defines.h"
|
||||
#include "yarp/parser.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
@ -18,6 +19,11 @@
|
||||
// also don't want it to stop on null bytes. Ruby actually allows null bytes
|
||||
// within strings, comments, regular expressions, etc. So we need to be able to
|
||||
// skip past them.
|
||||
const char * yp_strpbrk(const char *source, const char *charset, ptrdiff_t length);
|
||||
//
|
||||
// Finally, we want to support encodings wherein the charset could contain
|
||||
// characters that are trailing bytes of multi-byte characters. For example, in
|
||||
// Shift-JIS, the backslash character can be a trailing byte. In that case we
|
||||
// need to take a slower path and iterate one multi-byte character at a time.
|
||||
const char * yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length);
|
||||
|
||||
#endif
|
||||
|
5
yarp/version.h
Normal file
5
yarp/version.h
Normal file
@ -0,0 +1,5 @@
|
||||
#define YP_VERSION_MAJOR 0
|
||||
#define YP_VERSION_MINOR 4
|
||||
#define YP_VERSION_PATCH 0
|
||||
|
||||
#define YP_VERSION "0.4.0"
|
511
yarp/yarp.c
511
yarp/yarp.c
@ -1,16 +1,19 @@
|
||||
#include "yarp.h"
|
||||
#include "yarp/version.h"
|
||||
|
||||
#define YP_STRINGIZE0(expr) #expr
|
||||
#define YP_STRINGIZE(expr) YP_STRINGIZE0(expr)
|
||||
#define YP_VERSION_MACRO YP_STRINGIZE(YP_VERSION_MAJOR) "." YP_STRINGIZE(YP_VERSION_MINOR) "." YP_STRINGIZE(YP_VERSION_PATCH)
|
||||
|
||||
#define YP_TAB_WHITESPACE_SIZE 8
|
||||
|
||||
// The YARP version and the serialization format.
|
||||
const char *
|
||||
yp_version(void) {
|
||||
return YP_VERSION_MACRO;
|
||||
return YP_VERSION;
|
||||
}
|
||||
|
||||
// In heredocs, tabs automatically complete up to the next 8 spaces. This is
|
||||
// defined in CRuby as TAB_WIDTH.
|
||||
#define YP_TAB_WHITESPACE_SIZE 8
|
||||
|
||||
// Debugging logging will provide you will additional debugging functions as
|
||||
// well as automatically replace some functions with their debugging
|
||||
// counterparts.
|
||||
#ifndef YP_DEBUG_LOGGING
|
||||
#define YP_DEBUG_LOGGING 0
|
||||
#endif
|
||||
@ -442,6 +445,7 @@ not_provided(yp_parser_t *parser) {
|
||||
return (yp_token_t) { .type = YP_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
|
||||
}
|
||||
|
||||
#define YP_EMPTY_STRING ((yp_string_t) { .type = YP_STRING_SHARED, .as.shared.start = NULL, .as.shared.end = NULL })
|
||||
#define YP_LOCATION_NULL_VALUE(parser) ((yp_location_t) { .start = parser->start, .end = parser->start })
|
||||
#define YP_LOCATION_TOKEN_VALUE(token) ((yp_location_t) { .start = (token)->start, .end = (token)->end })
|
||||
#define YP_LOCATION_NODE_VALUE(node) ((yp_location_t) { .start = (node)->location.start, .end = (node)->location.end })
|
||||
@ -675,7 +679,9 @@ yp_array_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *node
|
||||
.constant = NULL,
|
||||
.rest = NULL,
|
||||
.requireds = YP_EMPTY_NODE_LIST,
|
||||
.posts = YP_EMPTY_NODE_LIST
|
||||
.posts = YP_EMPTY_NODE_LIST,
|
||||
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
||||
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
|
||||
};
|
||||
|
||||
// For now we're going to just copy over each pointer manually. This could be
|
||||
@ -684,7 +690,7 @@ yp_array_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *node
|
||||
for (size_t index = 0; index < nodes->size; index++) {
|
||||
yp_node_t *child = nodes->nodes[index];
|
||||
|
||||
if (child->type == YP_NODE_SPLAT_NODE) {
|
||||
if (!found_rest && child->type == YP_NODE_SPLAT_NODE) {
|
||||
node->rest = child;
|
||||
found_rest = true;
|
||||
} else if (found_rest) {
|
||||
@ -710,7 +716,9 @@ yp_array_pattern_node_rest_create(yp_parser_t *parser, yp_node_t *rest) {
|
||||
.constant = NULL,
|
||||
.rest = rest,
|
||||
.requireds = YP_EMPTY_NODE_LIST,
|
||||
.posts = YP_EMPTY_NODE_LIST
|
||||
.posts = YP_EMPTY_NODE_LIST,
|
||||
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
||||
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
|
||||
};
|
||||
|
||||
return node;
|
||||
@ -1885,7 +1893,9 @@ yp_find_pattern_node_create(yp_parser_t *parser, yp_node_list_t *nodes) {
|
||||
.constant = NULL,
|
||||
.left = left,
|
||||
.right = right,
|
||||
.requireds = YP_EMPTY_NODE_LIST
|
||||
.requireds = YP_EMPTY_NODE_LIST,
|
||||
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
||||
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
|
||||
};
|
||||
|
||||
// For now we're going to just copy over each pointer manually. This could be
|
||||
@ -2018,7 +2028,9 @@ yp_hash_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *assoc
|
||||
},
|
||||
.constant = NULL,
|
||||
.kwrest = NULL,
|
||||
.assocs = YP_EMPTY_NODE_LIST
|
||||
.assocs = YP_EMPTY_NODE_LIST,
|
||||
.opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
|
||||
.closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
|
||||
};
|
||||
|
||||
for (size_t index = 0; index < assocs->size; index++) {
|
||||
@ -3709,7 +3721,8 @@ yp_string_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_t
|
||||
},
|
||||
.opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
|
||||
.content_loc = YP_LOCATION_TOKEN_VALUE(content),
|
||||
.closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing)
|
||||
.closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
|
||||
.unescaped = YP_EMPTY_STRING
|
||||
};
|
||||
|
||||
return node;
|
||||
@ -3766,7 +3779,8 @@ yp_symbol_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_t
|
||||
},
|
||||
.opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
|
||||
.value_loc = YP_LOCATION_TOKEN_VALUE(value),
|
||||
.closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing)
|
||||
.closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
|
||||
.unescaped = YP_EMPTY_STRING
|
||||
};
|
||||
|
||||
return node;
|
||||
@ -3788,7 +3802,7 @@ yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
|
||||
ptrdiff_t length = label.end - label.start;
|
||||
assert(length >= 0);
|
||||
|
||||
yp_unescape_manipulate_string(label.start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
|
||||
yp_unescape_manipulate_string(parser, label.start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
|
||||
break;
|
||||
}
|
||||
case YP_TOKEN_MISSING: {
|
||||
@ -4073,7 +4087,8 @@ yp_xstring_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_
|
||||
},
|
||||
.opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
|
||||
.content_loc = YP_LOCATION_TOKEN_VALUE(content),
|
||||
.closing_loc = YP_LOCATION_TOKEN_VALUE(closing)
|
||||
.closing_loc = YP_LOCATION_TOKEN_VALUE(closing),
|
||||
.unescaped = YP_EMPTY_STRING
|
||||
};
|
||||
|
||||
return node;
|
||||
@ -4113,6 +4128,7 @@ yp_yield_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_lo
|
||||
}
|
||||
|
||||
|
||||
#undef YP_EMPTY_STRING
|
||||
#undef YP_LOCATION_NULL_VALUE
|
||||
#undef YP_LOCATION_TOKEN_VALUE
|
||||
#undef YP_LOCATION_NODE_VALUE
|
||||
@ -4331,6 +4347,17 @@ peek(yp_parser_t *parser) {
|
||||
}
|
||||
}
|
||||
|
||||
// Get the next string of length len in the source starting from parser->current.end.
|
||||
// If the string extends beyond the end of the source, return the empty string ""
|
||||
static inline const char*
|
||||
peek_string(yp_parser_t *parser, size_t len) {
|
||||
if (parser->current.end + len <= parser->end) {
|
||||
return parser->current.end;
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
// If the character to be read matches the given value, then returns true and
|
||||
// advanced the current pointer.
|
||||
static inline bool
|
||||
@ -4342,22 +4369,53 @@ match(yp_parser_t *parser, char value) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Skip to the next newline character or NUL byte.
|
||||
static inline const char *
|
||||
next_newline(const char *cursor, ptrdiff_t length) {
|
||||
assert(length >= 0);
|
||||
|
||||
// Note that it's okay for us to use memchr here to look for \n because none
|
||||
// of the encodings that we support have \n as a component of a multi-byte
|
||||
// character.
|
||||
return memchr(cursor, '\n', (size_t) length);
|
||||
}
|
||||
|
||||
// Find the start of the encoding comment. This is effectively an inlined
|
||||
// version of strnstr with some modifications.
|
||||
static inline const char *
|
||||
parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdiff_t remaining) {
|
||||
assert(remaining >= 0);
|
||||
size_t length = (size_t) remaining;
|
||||
|
||||
size_t key_length = strlen("coding:");
|
||||
if (key_length > length) return NULL;
|
||||
|
||||
const char *cursor_limit = cursor + length - key_length + 1;
|
||||
while ((cursor = yp_memchr(parser, cursor, 'c', (size_t) (cursor_limit - cursor))) != NULL) {
|
||||
if (
|
||||
(strncmp(cursor, "coding", key_length - 1) == 0) &&
|
||||
(cursor[key_length - 1] == ':' || cursor[key_length - 1] == '=')
|
||||
) {
|
||||
return cursor + key_length;
|
||||
}
|
||||
|
||||
cursor++;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Here we're going to check if this is a "magic" comment, and perform whatever
|
||||
// actions are necessary for it here.
|
||||
static void
|
||||
parser_lex_encoding_comment(yp_parser_t *parser) {
|
||||
const char *start = parser->current.start + 1;
|
||||
const char *end = memchr(start, '\n', (size_t) (parser->end - start));
|
||||
const char *end = next_newline(start, parser->end - start);
|
||||
if (end == NULL) end = parser->end;
|
||||
|
||||
// These are the patterns we're going to match to find the encoding comment.
|
||||
// This is definitely not complete or even really correct.
|
||||
const char *encoding_start = NULL;
|
||||
if ((encoding_start = yp_strnstr(start, "coding:", (size_t) (end - start))) != NULL) {
|
||||
encoding_start += 7;
|
||||
} else if ((encoding_start = yp_strnstr(start, "coding=", (size_t) (end - start))) != NULL) {
|
||||
encoding_start += 7;
|
||||
}
|
||||
const char *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
|
||||
|
||||
// If we didn't find anything that matched our patterns, then return. Note
|
||||
// that this does a _very_ poor job of actually finding the encoding, and
|
||||
@ -4370,7 +4428,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
|
||||
|
||||
// Now determine the end of the encoding string. This is either the end of
|
||||
// the line, the first whitespace character, or a punctuation mark.
|
||||
const char *encoding_end = yp_strpbrk(encoding_start, " \t\f\r\v\n;,", end - encoding_start);
|
||||
const char *encoding_end = yp_strpbrk(parser, encoding_start, " \t\f\r\v\n;,", end - encoding_start);
|
||||
encoding_end = encoding_end == NULL ? end : encoding_end;
|
||||
|
||||
// Finally, we can determine the width of the encoding string.
|
||||
@ -4392,7 +4450,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
|
||||
// Extensions like utf-8 can contain extra encoding details like,
|
||||
// utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
|
||||
// treat any encoding starting utf-8 as utf-8.
|
||||
if (strncasecmp(encoding_start, "utf-8", 5) == 0) {
|
||||
if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, "utf-8", 5) == 0)) {
|
||||
// We don't need to do anything here because the default encoding is
|
||||
// already UTF-8. We'll just return.
|
||||
return;
|
||||
@ -4401,7 +4459,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
|
||||
// Next, we're going to loop through each of the encodings that we handle
|
||||
// explicitly. If we found one that we understand, we'll use that value.
|
||||
#define ENCODING(value, prebuilt) \
|
||||
if (width == sizeof(value) - 1 && strncasecmp(encoding_start, value, sizeof(value) - 1) == 0) { \
|
||||
if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, value, width) == 0) { \
|
||||
parser->encoding = prebuilt; \
|
||||
parser->encoding_changed |= true; \
|
||||
if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
|
||||
@ -4866,7 +4924,8 @@ static yp_token_type_t
|
||||
lex_keyword(yp_parser_t *parser, const char *value, yp_lex_state_t state, yp_token_type_t type, yp_token_type_t modifier_type) {
|
||||
yp_lex_state_t last_state = parser->lex_state;
|
||||
|
||||
if (strncmp(parser->current.start, value, strlen(value)) == 0) {
|
||||
const size_t vlen = strlen(value);
|
||||
if (parser->current.start + vlen <= parser->end && strncmp(parser->current.start, value, vlen) == 0) {
|
||||
if (parser->lex_state & YP_LEX_STATE_FNAME) {
|
||||
lex_state_set(parser, YP_LEX_STATE_ENDFN);
|
||||
} else {
|
||||
@ -5275,7 +5334,7 @@ parser_comment(yp_parser_t *parser, yp_comment_type_t type) {
|
||||
static yp_token_type_t
|
||||
lex_embdoc(yp_parser_t *parser) {
|
||||
// First, lex out the EMBDOC_BEGIN token.
|
||||
const char *newline = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
|
||||
const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
|
||||
|
||||
if (newline == NULL) {
|
||||
parser->current.end = parser->end;
|
||||
@ -5300,7 +5359,7 @@ lex_embdoc(yp_parser_t *parser) {
|
||||
// token here.
|
||||
if (strncmp(parser->current.end, "=end", 4) == 0 &&
|
||||
(parser->current.end + 4 == parser->end || yp_char_is_whitespace(parser->current.end[4]))) {
|
||||
const char *newline = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
|
||||
const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
|
||||
|
||||
if (newline == NULL) {
|
||||
parser->current.end = parser->end;
|
||||
@ -5320,7 +5379,7 @@ lex_embdoc(yp_parser_t *parser) {
|
||||
|
||||
// Otherwise, we'll parse until the end of the line and return a line of
|
||||
// embedded documentation.
|
||||
const char *newline = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
|
||||
const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
|
||||
|
||||
if (newline == NULL) {
|
||||
parser->current.end = parser->end;
|
||||
@ -5466,9 +5525,9 @@ parser_lex(yp_parser_t *parser) {
|
||||
LEX(YP_TOKEN_EOF);
|
||||
|
||||
case '#': { // comments
|
||||
const char *ending = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
|
||||
const char *ending = next_newline(parser->current.end, parser->end - parser->current.end);
|
||||
while (ending && ending < parser->end && *ending != '\n') {
|
||||
ending = memchr(ending + 1, '\n', (size_t) (parser->end - ending));
|
||||
ending = next_newline(ending + 1, parser->end - ending);
|
||||
}
|
||||
|
||||
parser->current.end = ending == NULL ? parser->end : ending + 1;
|
||||
@ -5540,7 +5599,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
// Otherwise we'll return a regular newline.
|
||||
if (next_content[0] == '#') {
|
||||
// Here we look for a "." or "&." following a "\n".
|
||||
const char *following = memchr(next_content, '\n', (size_t) (parser->end - next_content));
|
||||
const char *following = next_newline(next_content, parser->end - next_content);
|
||||
|
||||
while (following && (following < parser->end)) {
|
||||
following++;
|
||||
@ -5552,7 +5611,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
|
||||
// If there is a comment, then we need to find the end of the
|
||||
// comment and continue searching from there.
|
||||
following = memchr(following, '\n', (size_t) (parser->end - following));
|
||||
following = next_newline(following, parser->end - following);
|
||||
}
|
||||
|
||||
// If the lex state was ignored, or we hit a '.' or a '&.',
|
||||
@ -5785,7 +5844,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
|
||||
// = => =~ == === =begin
|
||||
case '=':
|
||||
if (current_token_starts_line(parser) && strncmp(parser->current.end, "begin", 5) == 0 && yp_char_is_whitespace(parser->current.end[5])) {
|
||||
if (current_token_starts_line(parser) && strncmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_at(parser, 5))) {
|
||||
yp_token_type_t type = lex_embdoc(parser);
|
||||
|
||||
if (type == YP_TOKEN_EOF) {
|
||||
@ -5848,19 +5907,21 @@ parser_lex(yp_parser_t *parser) {
|
||||
const char *ident_start = parser->current.end;
|
||||
size_t width = 0;
|
||||
|
||||
if (quote == YP_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
|
||||
if (parser->current.end >= parser->end) {
|
||||
parser->current.end = end;
|
||||
} else if (quote == YP_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
|
||||
parser->current.end = end;
|
||||
} else {
|
||||
if (quote == YP_HEREDOC_QUOTE_NONE) {
|
||||
parser->current.end += width;
|
||||
|
||||
while ((width = char_is_identifier(parser, parser->current.end))) {
|
||||
while ((parser->current.end < parser->end) && (width = char_is_identifier(parser, parser->current.end))) {
|
||||
parser->current.end += width;
|
||||
}
|
||||
} else {
|
||||
// If we have quotes, then we're going to go until we find the
|
||||
// end quote.
|
||||
while (parser->current.end < parser->end && quote != (yp_heredoc_quote_t) (*parser->current.end)) {
|
||||
while ((parser->current.end < parser->end) && quote != (yp_heredoc_quote_t) (*parser->current.end)) {
|
||||
parser->current.end++;
|
||||
}
|
||||
}
|
||||
@ -5882,7 +5943,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
});
|
||||
|
||||
if (parser->heredoc_end == NULL) {
|
||||
const char *body_start = (const char *) memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
|
||||
const char *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
|
||||
|
||||
if (body_start == NULL) {
|
||||
// If there is no newline after the heredoc identifier, then
|
||||
@ -6465,13 +6526,13 @@ parser_lex(yp_parser_t *parser) {
|
||||
// Here we'll get a list of the places where strpbrk should break,
|
||||
// and then find the first one.
|
||||
const char *breakpoints = parser->lex_modes.current->as.list.breakpoints;
|
||||
const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
|
||||
const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
||||
|
||||
while (breakpoint != NULL) {
|
||||
switch (*breakpoint) {
|
||||
case '\0':
|
||||
// If we hit a null byte, skip directly past it.
|
||||
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
break;
|
||||
case '\\': {
|
||||
// If we hit escapes, then we need to treat the next token
|
||||
@ -6492,7 +6553,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
|
||||
}
|
||||
|
||||
breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
|
||||
break;
|
||||
}
|
||||
case ' ':
|
||||
@ -6517,7 +6578,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
// that looked like an interpolated class or instance variable
|
||||
// like "#@" but wasn't actually. In this case we'll just skip
|
||||
// to the next breakpoint.
|
||||
breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
|
||||
breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -6526,7 +6587,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
if (*breakpoint == parser->lex_modes.current->as.list.incrementor) {
|
||||
// If we've hit the incrementor, then we need to skip past it and
|
||||
// find the next breakpoint.
|
||||
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
parser->lex_modes.current->as.list.nesting++;
|
||||
break;
|
||||
}
|
||||
@ -6537,7 +6598,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
// If this terminator doesn't actually close the list, then we need
|
||||
// to continue on past it.
|
||||
if (parser->lex_modes.current->as.list.nesting > 0) {
|
||||
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
parser->lex_modes.current->as.list.nesting--;
|
||||
break;
|
||||
}
|
||||
@ -6577,13 +6638,13 @@ parser_lex(yp_parser_t *parser) {
|
||||
// regular expression. We'll use strpbrk to find the first of these
|
||||
// characters.
|
||||
const char *breakpoints = parser->lex_modes.current->as.regexp.breakpoints;
|
||||
const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
|
||||
const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
||||
|
||||
while (breakpoint != NULL) {
|
||||
switch (*breakpoint) {
|
||||
case '\0':
|
||||
// If we hit a null byte, skip directly past it.
|
||||
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
break;
|
||||
case '\\': {
|
||||
// If we hit escapes, then we need to treat the next token
|
||||
@ -6597,7 +6658,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
|
||||
}
|
||||
|
||||
breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
|
||||
break;
|
||||
}
|
||||
case '#': {
|
||||
@ -6613,7 +6674,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
// that looked like an interpolated class or instance variable
|
||||
// like "#@" but wasn't actually. In this case we'll just skip
|
||||
// to the next breakpoint.
|
||||
breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
|
||||
breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -6622,7 +6683,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
if (*breakpoint == parser->lex_modes.current->as.regexp.incrementor) {
|
||||
// If we've hit the incrementor, then we need to skip past it and
|
||||
// find the next breakpoint.
|
||||
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
parser->lex_modes.current->as.regexp.nesting++;
|
||||
break;
|
||||
}
|
||||
@ -6635,7 +6696,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
if (parser->lex_modes.current->as.regexp.terminator != '\n') {
|
||||
// If the terminator is not a newline, then we
|
||||
// can set the next breakpoint and continue.
|
||||
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -6646,7 +6707,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
assert(*breakpoint == parser->lex_modes.current->as.regexp.terminator);
|
||||
|
||||
if (parser->lex_modes.current->as.regexp.nesting > 0) {
|
||||
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
parser->lex_modes.current->as.regexp.nesting--;
|
||||
break;
|
||||
}
|
||||
@ -6694,7 +6755,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
// These are the places where we need to split up the content of the
|
||||
// string. We'll use strpbrk to find the first of these characters.
|
||||
const char *breakpoints = parser->lex_modes.current->as.string.breakpoints;
|
||||
const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
|
||||
const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
||||
|
||||
while (breakpoint != NULL) {
|
||||
// If we hit the incrementor, then we'll increment then nesting and
|
||||
@ -6704,7 +6765,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
*breakpoint == parser->lex_modes.current->as.string.incrementor
|
||||
) {
|
||||
parser->lex_modes.current->as.string.nesting++;
|
||||
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -6715,7 +6776,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
// If this terminator doesn't actually close the string, then we need
|
||||
// to continue on past it.
|
||||
if (parser->lex_modes.current->as.string.nesting > 0) {
|
||||
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
parser->lex_modes.current->as.string.nesting--;
|
||||
continue;
|
||||
}
|
||||
@ -6762,7 +6823,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
if (*breakpoint == '\n') {
|
||||
if (parser->heredoc_end == NULL) {
|
||||
yp_newline_list_append(&parser->newline_list, breakpoint);
|
||||
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
continue;
|
||||
} else {
|
||||
parser->current.end = breakpoint + 1;
|
||||
@ -6774,7 +6835,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
switch (*breakpoint) {
|
||||
case '\0':
|
||||
// Skip directly past the null character.
|
||||
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
break;
|
||||
case '\\': {
|
||||
// If we hit escapes, then we need to treat the next token
|
||||
@ -6789,7 +6850,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
|
||||
}
|
||||
|
||||
breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
|
||||
break;
|
||||
}
|
||||
case '#': {
|
||||
@ -6802,7 +6863,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
// looked like an interpolated class or instance variable like "#@"
|
||||
// but wasn't actually. In this case we'll just skip to the next
|
||||
// breakpoint.
|
||||
breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
|
||||
breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -6844,7 +6905,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
start += yp_strspn_inline_whitespace(start, parser->end - start);
|
||||
}
|
||||
|
||||
if (strncmp(start, ident_start, ident_length) == 0) {
|
||||
if ((start + ident_length <= parser->end) && (strncmp(start, ident_start, ident_length) == 0)) {
|
||||
bool matched = true;
|
||||
bool at_end = false;
|
||||
|
||||
@ -6888,13 +6949,13 @@ parser_lex(yp_parser_t *parser) {
|
||||
breakpoints[2] = '\0';
|
||||
}
|
||||
|
||||
const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
|
||||
const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
||||
|
||||
while (breakpoint != NULL) {
|
||||
switch (*breakpoint) {
|
||||
case '\0':
|
||||
// Skip directly past the null character.
|
||||
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
break;
|
||||
case '\n': {
|
||||
yp_newline_list_append(&parser->newline_list, breakpoint);
|
||||
@ -6939,7 +7000,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
|
||||
// Otherwise we hit a newline and it wasn't followed by a
|
||||
// terminator, so we can continue parsing.
|
||||
breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
|
||||
break;
|
||||
}
|
||||
case '\\': {
|
||||
@ -6956,7 +7017,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
|
||||
}
|
||||
|
||||
breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
|
||||
breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -6970,7 +7031,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
// that looked like an interpolated class or instance variable
|
||||
// like "#@" but wasn't actually. In this case we'll just skip
|
||||
// to the next breakpoint.
|
||||
breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
|
||||
breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -7007,7 +7068,7 @@ yp_regular_expression_node_create_and_unescape(yp_parser_t *parser, const yp_tok
|
||||
ptrdiff_t length = content->end - content->start;
|
||||
assert(length >= 0);
|
||||
|
||||
yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
|
||||
yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
|
||||
return node;
|
||||
}
|
||||
|
||||
@ -7018,7 +7079,7 @@ yp_symbol_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
|
||||
ptrdiff_t length = content->end - content->start;
|
||||
assert(length >= 0);
|
||||
|
||||
yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
|
||||
yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
|
||||
return node;
|
||||
}
|
||||
|
||||
@ -7029,7 +7090,7 @@ yp_string_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
|
||||
ptrdiff_t length = content->end - content->start;
|
||||
assert(length >= 0);
|
||||
|
||||
yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
|
||||
yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
|
||||
return node;
|
||||
}
|
||||
|
||||
@ -7040,7 +7101,7 @@ yp_xstring_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openi
|
||||
ptrdiff_t length = content->end - content->start;
|
||||
assert(length >= 0);
|
||||
|
||||
yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
|
||||
yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
|
||||
return node;
|
||||
}
|
||||
|
||||
@ -7505,10 +7566,10 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
|
||||
// the previous method name in, and append an =.
|
||||
size_t length = yp_string_length(&call->name);
|
||||
|
||||
char *name = malloc(length + 2);
|
||||
char *name = calloc(length + 2, sizeof(char));
|
||||
if (name == NULL) return NULL;
|
||||
|
||||
sprintf(name, "%.*s=", (int) length, yp_string_source(&call->name));
|
||||
yp_snprintf(name, length + 2, "%.*s=", (int) length, yp_string_source(&call->name));
|
||||
|
||||
// Now switch the name to the new string.
|
||||
yp_string_free(&call->name);
|
||||
@ -8954,9 +9015,11 @@ parse_string_part(yp_parser_t *parser) {
|
||||
|
||||
static yp_node_t *
|
||||
parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_state) {
|
||||
bool lex_string = lex_mode->mode == YP_LEX_STRING;
|
||||
bool lex_interpolation = lex_string && lex_mode->as.string.interpolation;
|
||||
yp_token_t opening = parser->previous;
|
||||
|
||||
if (lex_mode->mode != YP_LEX_STRING) {
|
||||
if (!lex_string) {
|
||||
if (next_state != YP_LEX_STATE_NONE) {
|
||||
lex_state_set(parser, next_state);
|
||||
}
|
||||
@ -8990,9 +9053,9 @@ parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_s
|
||||
}
|
||||
|
||||
// If we weren't in a string in the previous check then we have to be now.
|
||||
assert(lex_mode->mode == YP_LEX_STRING);
|
||||
assert(lex_string);
|
||||
|
||||
if (lex_mode->as.string.interpolation) {
|
||||
if (lex_interpolation) {
|
||||
yp_interpolated_symbol_node_t *interpolated = yp_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
|
||||
|
||||
while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
|
||||
@ -9043,9 +9106,10 @@ parse_undef_argument(yp_parser_t *parser) {
|
||||
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL);
|
||||
}
|
||||
case YP_TOKEN_SYMBOL_BEGIN: {
|
||||
yp_lex_mode_t *lex_mode = parser->lex_modes.current;
|
||||
yp_lex_mode_t lex_mode = *parser->lex_modes.current;
|
||||
parser_lex(parser);
|
||||
return parse_symbol(parser, lex_mode, YP_LEX_STATE_NONE);
|
||||
|
||||
return parse_symbol(parser, &lex_mode, YP_LEX_STATE_NONE);
|
||||
}
|
||||
default:
|
||||
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Expected a bare word or symbol argument.");
|
||||
@ -9075,10 +9139,10 @@ parse_alias_argument(yp_parser_t *parser, bool first) {
|
||||
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL);
|
||||
}
|
||||
case YP_TOKEN_SYMBOL_BEGIN: {
|
||||
yp_lex_mode_t *lex_mode = parser->lex_modes.current;
|
||||
yp_lex_mode_t lex_mode = *parser->lex_modes.current;
|
||||
parser_lex(parser);
|
||||
|
||||
return parse_symbol(parser, lex_mode, first ? YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM : YP_LEX_STATE_NONE);
|
||||
return parse_symbol(parser, &lex_mode, first ? YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM : YP_LEX_STATE_NONE);
|
||||
}
|
||||
case YP_TOKEN_BACK_REFERENCE:
|
||||
parser_lex(parser);
|
||||
@ -9177,7 +9241,7 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
|
||||
common_whitespace = cur_whitespace;
|
||||
}
|
||||
|
||||
cur_char = memchr(cur_char + 1, '\n', (size_t) (parser->end - (cur_char + 1)));
|
||||
cur_char = next_newline(cur_char + 1, parser->end - (cur_char + 1));
|
||||
if (cur_char) cur_char++;
|
||||
}
|
||||
}
|
||||
@ -9252,7 +9316,7 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
|
||||
|
||||
// At this point we have dedented all that we need to, so we need to find
|
||||
// the next newline.
|
||||
const char *breakpoint = memchr(source_cursor, '\n', (size_t) (source_end - source_cursor));
|
||||
const char *breakpoint = next_newline(source_cursor, source_end - source_cursor);
|
||||
|
||||
if (breakpoint == NULL) {
|
||||
// If there isn't another newline, then we can just move the rest of the
|
||||
@ -9293,92 +9357,106 @@ parse_pattern_constant_path(yp_parser_t *parser, yp_node_t *node) {
|
||||
// If there is a [ or ( that follows, then this is part of a larger pattern
|
||||
// expression. We'll parse the inner pattern here, then modify the returned
|
||||
// inner pattern with our constant path attached.
|
||||
if (match_any_type_p(parser, 2, YP_TOKEN_BRACKET_LEFT, YP_TOKEN_PARENTHESIS_LEFT)) {
|
||||
yp_token_t opening;
|
||||
yp_token_t closing;
|
||||
yp_node_t *inner = NULL;
|
||||
|
||||
if (accept(parser, YP_TOKEN_BRACKET_LEFT)) {
|
||||
opening = parser->previous;
|
||||
|
||||
accept(parser, YP_TOKEN_NEWLINE);
|
||||
|
||||
if (!accept(parser, YP_TOKEN_BRACKET_RIGHT)) {
|
||||
inner = parse_pattern(parser, true, "Expected a pattern expression after the [ operator.");
|
||||
accept(parser, YP_TOKEN_NEWLINE);
|
||||
|
||||
expect(parser, YP_TOKEN_BRACKET_RIGHT, "Expected a ] to close the pattern expression.");
|
||||
}
|
||||
|
||||
closing = parser->previous;
|
||||
} else {
|
||||
parser_lex(parser);
|
||||
opening = parser->previous;
|
||||
|
||||
if (!accept(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
|
||||
inner = parse_pattern(parser, true, "Expected a pattern expression after the ( operator.");
|
||||
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected a ) to close the pattern expression.");
|
||||
}
|
||||
|
||||
closing = parser->previous;
|
||||
}
|
||||
|
||||
if (inner) {
|
||||
// Now that we have the inner pattern, check to see if it's an array, find,
|
||||
// or hash pattern. If it is, then we'll attach our constant path to it. If
|
||||
// it's not, then we'll create an array pattern.
|
||||
switch (inner->type) {
|
||||
case YP_NODE_ARRAY_PATTERN_NODE: {
|
||||
yp_array_pattern_node_t *pattern_node = (yp_array_pattern_node_t *)inner;
|
||||
pattern_node->base.location.start = node->location.start;
|
||||
pattern_node->base.location.end = closing.end;
|
||||
|
||||
pattern_node->constant = node;
|
||||
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
|
||||
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
|
||||
|
||||
node = (yp_node_t *)pattern_node;
|
||||
break;
|
||||
}
|
||||
case YP_NODE_FIND_PATTERN_NODE: {
|
||||
yp_find_pattern_node_t *pattern_node = (yp_find_pattern_node_t *) inner;
|
||||
pattern_node->base.location.start = node->location.start;
|
||||
pattern_node->base.location.end = closing.end;
|
||||
|
||||
pattern_node->constant = node;
|
||||
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
|
||||
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
|
||||
|
||||
node = (yp_node_t *) pattern_node;
|
||||
break;
|
||||
}
|
||||
case YP_NODE_HASH_PATTERN_NODE: {
|
||||
yp_hash_pattern_node_t *pattern_node = (yp_hash_pattern_node_t *)inner;
|
||||
pattern_node->base.location.start = node->location.start;
|
||||
pattern_node->base.location.end = closing.end;
|
||||
|
||||
pattern_node->constant = node;
|
||||
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
|
||||
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
|
||||
|
||||
node = (yp_node_t *) pattern_node;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
yp_array_pattern_node_t *pattern_node = yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
|
||||
yp_array_pattern_node_requireds_append(pattern_node, inner);
|
||||
node = (yp_node_t *)pattern_node;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If there was no inner pattern, then we have something like Foo() or
|
||||
// Foo[]. In that case we'll create an array pattern with no requireds.
|
||||
node = (yp_node_t *)yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
|
||||
}
|
||||
if (!match_any_type_p(parser, 2, YP_TOKEN_BRACKET_LEFT, YP_TOKEN_PARENTHESIS_LEFT)) {
|
||||
return node;
|
||||
}
|
||||
|
||||
return node;
|
||||
yp_token_t opening;
|
||||
yp_token_t closing;
|
||||
yp_node_t *inner = NULL;
|
||||
|
||||
if (accept(parser, YP_TOKEN_BRACKET_LEFT)) {
|
||||
opening = parser->previous;
|
||||
accept(parser, YP_TOKEN_NEWLINE);
|
||||
|
||||
if (!accept(parser, YP_TOKEN_BRACKET_RIGHT)) {
|
||||
inner = parse_pattern(parser, true, "Expected a pattern expression after the [ operator.");
|
||||
accept(parser, YP_TOKEN_NEWLINE);
|
||||
expect(parser, YP_TOKEN_BRACKET_RIGHT, "Expected a ] to close the pattern expression.");
|
||||
}
|
||||
|
||||
closing = parser->previous;
|
||||
} else {
|
||||
parser_lex(parser);
|
||||
opening = parser->previous;
|
||||
|
||||
if (!accept(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
|
||||
inner = parse_pattern(parser, true, "Expected a pattern expression after the ( operator.");
|
||||
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected a ) to close the pattern expression.");
|
||||
}
|
||||
|
||||
closing = parser->previous;
|
||||
}
|
||||
|
||||
if (!inner) {
|
||||
// If there was no inner pattern, then we have something like Foo() or
|
||||
// Foo[]. In that case we'll create an array pattern with no requireds.
|
||||
return (yp_node_t *) yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
|
||||
}
|
||||
|
||||
// Now that we have the inner pattern, check to see if it's an array, find,
|
||||
// or hash pattern. If it is, then we'll attach our constant path to it if
|
||||
// it doesn't already have a constant. If it's not one of those node types
|
||||
// or it does have a constant, then we'll create an array pattern.
|
||||
switch (inner->type) {
|
||||
case YP_NODE_ARRAY_PATTERN_NODE: {
|
||||
yp_array_pattern_node_t *pattern_node = (yp_array_pattern_node_t *) inner;
|
||||
|
||||
if (pattern_node->constant == NULL) {
|
||||
pattern_node->base.location.start = node->location.start;
|
||||
pattern_node->base.location.end = closing.end;
|
||||
|
||||
pattern_node->constant = node;
|
||||
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
|
||||
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
|
||||
|
||||
return (yp_node_t *) pattern_node;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case YP_NODE_FIND_PATTERN_NODE: {
|
||||
yp_find_pattern_node_t *pattern_node = (yp_find_pattern_node_t *) inner;
|
||||
|
||||
if (pattern_node->constant == NULL) {
|
||||
pattern_node->base.location.start = node->location.start;
|
||||
pattern_node->base.location.end = closing.end;
|
||||
|
||||
pattern_node->constant = node;
|
||||
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
|
||||
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
|
||||
|
||||
return (yp_node_t *) pattern_node;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case YP_NODE_HASH_PATTERN_NODE: {
|
||||
yp_hash_pattern_node_t *pattern_node = (yp_hash_pattern_node_t *) inner;
|
||||
|
||||
if (pattern_node->constant == NULL) {
|
||||
pattern_node->base.location.start = node->location.start;
|
||||
pattern_node->base.location.end = closing.end;
|
||||
|
||||
pattern_node->constant = node;
|
||||
pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
|
||||
pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
|
||||
|
||||
return (yp_node_t *) pattern_node;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// If we got here, then we didn't return one of the inner patterns by
|
||||
// attaching its constant. In this case we'll create an array pattern and
|
||||
// attach our constant to it.
|
||||
yp_array_pattern_node_t *pattern_node = yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
|
||||
yp_array_pattern_node_requireds_append(pattern_node, inner);
|
||||
return (yp_node_t *) pattern_node;
|
||||
}
|
||||
|
||||
// Parse a rest pattern.
|
||||
@ -9897,8 +9975,6 @@ parse_pattern(yp_parser_t *parser, bool top_pattern, const char *message) {
|
||||
// Parse an expression that begins with the previous node that we just lexed.
|
||||
static inline yp_node_t *
|
||||
parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
||||
yp_lex_mode_t *lex_mode = parser->lex_modes.current;
|
||||
|
||||
switch (parser->current.type) {
|
||||
case YP_TOKEN_BRACKET_LEFT_ARRAY: {
|
||||
parser_lex(parser);
|
||||
@ -11015,7 +11091,10 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
||||
lex_state_set(parser, YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM);
|
||||
parser_lex(parser);
|
||||
name = parse_undef_argument(parser);
|
||||
if (name->type == YP_NODE_MISSING_NODE) break;
|
||||
if (name->type == YP_NODE_MISSING_NODE) {
|
||||
yp_node_destroy(parser, name);
|
||||
break;
|
||||
}
|
||||
|
||||
yp_undef_node_append(undef, name);
|
||||
}
|
||||
@ -11043,6 +11122,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
||||
receiver = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, "Expected expression after `not`.");
|
||||
|
||||
if (!parser->recovering) {
|
||||
accept(parser, YP_TOKEN_NEWLINE);
|
||||
expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected ')' after 'not' expression.");
|
||||
arguments.closing_loc = ((yp_location_t) { .start = parser->previous.start, .end = parser->previous.end });
|
||||
}
|
||||
@ -11727,9 +11807,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
||||
return (yp_node_t *) node;
|
||||
}
|
||||
case YP_TOKEN_STRING_BEGIN: {
|
||||
assert(parser->lex_modes.current->mode == YP_LEX_STRING);
|
||||
bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
|
||||
|
||||
yp_token_t opening = parser->current;
|
||||
parser_lex(parser);
|
||||
|
||||
yp_token_t opening = parser->previous;
|
||||
yp_node_t *node;
|
||||
|
||||
if (accept(parser, YP_TOKEN_STRING_END)) {
|
||||
@ -11754,7 +11837,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
||||
};
|
||||
|
||||
return (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
|
||||
} else if (!lex_mode->as.string.interpolation) {
|
||||
} else if (!lex_interpolation) {
|
||||
// If we don't accept interpolation then we expect the string to start
|
||||
// with a single string content node.
|
||||
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter.");
|
||||
@ -11858,9 +11941,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
||||
return node;
|
||||
}
|
||||
}
|
||||
case YP_TOKEN_SYMBOL_BEGIN:
|
||||
case YP_TOKEN_SYMBOL_BEGIN: {
|
||||
yp_lex_mode_t lex_mode = *parser->lex_modes.current;
|
||||
parser_lex(parser);
|
||||
return parse_symbol(parser, lex_mode, YP_LEX_STATE_END);
|
||||
|
||||
return parse_symbol(parser, &lex_mode, YP_LEX_STATE_END);
|
||||
}
|
||||
default:
|
||||
if (context_recoverable(parser, &parser->current)) {
|
||||
parser->recovering = true;
|
||||
@ -12482,82 +12568,8 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
|
||||
|
||||
return path;
|
||||
}
|
||||
case YP_TOKEN_AMPERSAND:
|
||||
case YP_TOKEN_BACKTICK:
|
||||
case YP_TOKEN_BANG:
|
||||
case YP_TOKEN_BANG_EQUAL:
|
||||
case YP_TOKEN_BANG_TILDE:
|
||||
case YP_TOKEN_CARET:
|
||||
case YP_TOKEN_EQUAL_EQUAL:
|
||||
case YP_TOKEN_EQUAL_EQUAL_EQUAL:
|
||||
case YP_TOKEN_EQUAL_TILDE:
|
||||
case YP_TOKEN_GREATER:
|
||||
case YP_TOKEN_GREATER_EQUAL:
|
||||
case YP_TOKEN_GREATER_GREATER:
|
||||
case YP_TOKEN_HEREDOC_START:
|
||||
case YP_TOKEN_IGNORED_NEWLINE:
|
||||
case YP_TOKEN_KEYWORD_ALIAS:
|
||||
case YP_TOKEN_KEYWORD_AND:
|
||||
case YP_TOKEN_KEYWORD_BEGIN:
|
||||
case YP_TOKEN_KEYWORD_BEGIN_UPCASE:
|
||||
case YP_TOKEN_KEYWORD_BREAK:
|
||||
case YP_TOKEN_KEYWORD_CASE:
|
||||
case YP_TOKEN_KEYWORD_CLASS:
|
||||
case YP_TOKEN_KEYWORD_DEF:
|
||||
case YP_TOKEN_KEYWORD_DEFINED:
|
||||
case YP_TOKEN_KEYWORD_DO:
|
||||
case YP_TOKEN_KEYWORD_ELSE:
|
||||
case YP_TOKEN_KEYWORD_ELSIF:
|
||||
case YP_TOKEN_KEYWORD_END:
|
||||
case YP_TOKEN_KEYWORD_END_UPCASE:
|
||||
case YP_TOKEN_KEYWORD_ENSURE:
|
||||
case YP_TOKEN_KEYWORD_FALSE:
|
||||
case YP_TOKEN_KEYWORD_FOR:
|
||||
case YP_TOKEN_KEYWORD_IF:
|
||||
case YP_TOKEN_KEYWORD_IN:
|
||||
case YP_TOKEN_KEYWORD_NEXT:
|
||||
case YP_TOKEN_KEYWORD_NIL:
|
||||
case YP_TOKEN_KEYWORD_NOT:
|
||||
case YP_TOKEN_KEYWORD_OR:
|
||||
case YP_TOKEN_KEYWORD_REDO:
|
||||
case YP_TOKEN_KEYWORD_RESCUE:
|
||||
case YP_TOKEN_KEYWORD_RETRY:
|
||||
case YP_TOKEN_KEYWORD_RETURN:
|
||||
case YP_TOKEN_KEYWORD_SELF:
|
||||
case YP_TOKEN_KEYWORD_SUPER:
|
||||
case YP_TOKEN_KEYWORD_THEN:
|
||||
case YP_TOKEN_KEYWORD_TRUE:
|
||||
case YP_TOKEN_KEYWORD_UNDEF:
|
||||
case YP_TOKEN_KEYWORD_UNLESS:
|
||||
case YP_TOKEN_KEYWORD_UNTIL:
|
||||
case YP_TOKEN_KEYWORD_WHEN:
|
||||
case YP_TOKEN_KEYWORD_WHILE:
|
||||
case YP_TOKEN_KEYWORD_YIELD:
|
||||
case YP_TOKEN_KEYWORD___ENCODING__:
|
||||
case YP_TOKEN_KEYWORD___FILE__:
|
||||
case YP_TOKEN_KEYWORD___LINE__:
|
||||
case YP_TOKEN_LESS:
|
||||
case YP_TOKEN_LESS_EQUAL:
|
||||
case YP_TOKEN_LESS_EQUAL_GREATER:
|
||||
case YP_TOKEN_LESS_LESS:
|
||||
case YP_TOKEN_MINUS:
|
||||
case YP_TOKEN_PERCENT:
|
||||
case YP_TOKEN_PERCENT_LOWER_I:
|
||||
case YP_TOKEN_PERCENT_LOWER_W:
|
||||
case YP_TOKEN_PERCENT_LOWER_X:
|
||||
case YP_TOKEN_PERCENT_UPPER_I:
|
||||
case YP_TOKEN_PERCENT_UPPER_W:
|
||||
case YP_TOKEN_PIPE:
|
||||
case YP_TOKEN_PLUS:
|
||||
case YP_TOKEN_REGEXP_BEGIN:
|
||||
case YP_TOKEN_SLASH:
|
||||
case YP_TOKEN_STAR:
|
||||
case YP_TOKEN_STAR_STAR:
|
||||
case YP_TOKEN_TILDE:
|
||||
case YP_TOKEN_UCOLON_COLON:
|
||||
case YP_TOKEN_UDOT_DOT:
|
||||
case YP_TOKEN_UDOT_DOT_DOT:
|
||||
case YP_TOKEN___END__:
|
||||
case YP_CASE_OPERATOR:
|
||||
case YP_CASE_KEYWORD:
|
||||
case YP_TOKEN_IDENTIFIER: {
|
||||
parser_lex(parser);
|
||||
|
||||
@ -12805,7 +12817,7 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
|
||||
} else if (size >= 2 && source[0] == '#' && source[1] == '!') {
|
||||
// If the first two bytes of the source are a shebang, then we'll indicate
|
||||
// that the encoding comment is at the end of the shebang.
|
||||
const char *encoding_comment_start = memchr(source, '\n', size);
|
||||
const char *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
|
||||
if (encoding_comment_start) {
|
||||
parser->encoding_comment_start = encoding_comment_start + 1;
|
||||
}
|
||||
@ -12891,6 +12903,3 @@ yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer) {
|
||||
#undef YP_CASE_KEYWORD
|
||||
#undef YP_CASE_OPERATOR
|
||||
#undef YP_CASE_WRITABLE
|
||||
#undef YP_STRINGIZE
|
||||
#undef YP_STRINGIZE0
|
||||
#undef YP_VERSION_MACRO
|
||||
|
44
yarp/yarp.h
44
yarp/yarp.h
@ -2,19 +2,6 @@
|
||||
#define YARP_H
|
||||
|
||||
#include "yarp/defines.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#ifndef _WIN32
|
||||
#include <strings.h>
|
||||
#endif
|
||||
|
||||
#include "yarp/missing.h"
|
||||
#include "yarp/ast.h"
|
||||
#include "yarp/diagnostic.h"
|
||||
#include "yarp/node.h"
|
||||
@ -24,17 +11,26 @@
|
||||
#include "yarp/unescape.h"
|
||||
#include "yarp/util/yp_buffer.h"
|
||||
#include "yarp/util/yp_char.h"
|
||||
#include "yarp/util/yp_memchr.h"
|
||||
#include "yarp/util/yp_strpbrk.h"
|
||||
|
||||
#define YP_VERSION_MAJOR 0
|
||||
#define YP_VERSION_MINOR 4
|
||||
#define YP_VERSION_PATCH 0
|
||||
#include <assert.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <strings.h>
|
||||
#endif
|
||||
|
||||
void yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
|
||||
|
||||
void yp_print_node(yp_parser_t *parser, yp_node_t *node);
|
||||
|
||||
// Returns the YARP version and notably the serialization format
|
||||
// The YARP version and the serialization format.
|
||||
YP_EXPORTED_FUNCTION const char * yp_version(void);
|
||||
|
||||
// Initialize a parser with the given start and end pointers.
|
||||
@ -57,20 +53,6 @@ YP_EXPORTED_FUNCTION void yp_parser_free(yp_parser_t *parser);
|
||||
// Parse the Ruby source associated with the given parser and return the tree.
|
||||
YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser);
|
||||
|
||||
// Deallocate a node and all of its children.
|
||||
YP_EXPORTED_FUNCTION void yp_node_destroy(yp_parser_t *parser, struct yp_node *node);
|
||||
|
||||
// This struct stores the information gathered by the yp_node_memsize function.
|
||||
// It contains both the memory footprint and additionally metadata about the
|
||||
// shape of the tree.
|
||||
typedef struct {
|
||||
size_t memsize;
|
||||
size_t node_count;
|
||||
} yp_memsize_t;
|
||||
|
||||
// Calculates the memory footprint of a given node.
|
||||
YP_EXPORTED_FUNCTION void yp_node_memsize(yp_node_t *node, yp_memsize_t *memsize);
|
||||
|
||||
// Pretty-prints the AST represented by the given node to the given buffer.
|
||||
YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user