Manual YARP resync

Merged: https://github.com/ruby/ruby/pull/8007
2023-06-30 14:30:24 -04:00 · 2023-06-30 14:30:24 -04:00 · bfb933371d · 2023-07-05 20:59:16 +00:00
commit bfb933371d
parent 6f9d1b4b0f
77 changed files with 5222 additions and 5751 deletions
--- a/lib/yarp.rb
+++ b/lib/yarp.rb
@ -1,17 +1,79 @@
 # frozen_string_literal: true
 module YARP
-  # This represents a location in the source corresponding to a node or token.
+  # This represents a source of Ruby code that has been parsed. It is used in
-  class Location
+  # conjunction with locations to allow them to resolve line numbers and source
-    attr_reader :start_offset, :length
+  # ranges.
  class Source
    attr_reader :source, :offsets
-    def initialize(start_offset, length)
+    def initialize(source, offsets)
      @source = source
      @offsets = offsets
    end
    def slice(offset, length)
      source.byteslice(offset, length)
    end
    def line(value)
      offsets.bsearch_index { |offset| offset > value } || offsets.length
    end
    def column(value)
      value - offsets[line(value) - 1]
    end
  end
  # This represents a location in the source.
  class Location
    # A Source object that is used to determine more information from the given
    # offset and length.
    private attr_reader :source
    # The byte offset from the beginning of the source where this location
    # starts.
    attr_reader :start_offset
    # The length of this location in bytes.
    attr_reader :length
    def initialize(source, start_offset, length)
      @source = source
      @start_offset = start_offset
      @length = length
    end
    # The source code that this location represents.
    def slice
      source.slice(start_offset, length)
    end
    # The byte offset from the beginning of the source where this location ends.
    def end_offset
-      @start_offset + @length
+      start_offset + length
    end
    # The line number where this location starts.
    def start_line
      source.line(start_offset)
    end
    # The line number where this location ends.
    def end_line
      source.line(end_offset - 1)
    end
    # The column number in bytes where this location starts from the start of
    # the line.
    def start_column
      source.column(start_offset)
    end
    # The column number in bytes where this location ends from the start of the
    # line.
    def end_column
      source.column(end_offset - 1)
    end
    def deconstruct_keys(keys)
@ -101,21 +163,12 @@ module YARP
  # This represents a token from the Ruby source.
  class Token
-    attr_reader :type, :value, :start_offset, :length
+    attr_reader :type, :value, :location
-    def initialize(type, value, start_offset, length)
+    def initialize(type, value, location)
      @type = type
      @value = value
-      @start_offset = start_offset
+      @location = location
      @length = length
    end
    def end_offset
      @start_offset + @length
    end
    def location
      Location.new(@start_offset, @length)
    end
    def deconstruct_keys(keys)
@ -143,20 +196,12 @@ module YARP
  # This represents a node in the tree.
  class Node
-    attr_reader :start_offset, :length
+    attr_reader :location
    def end_offset
      @start_offset + @length
    end
    def location
      Location.new(@start_offset, @length)
    end
    def pretty_print(q)
      q.group do
        q.text(self.class.name.split("::").last)
-        self.location.pretty_print(q)
+        location.pretty_print(q)
        q.text("(")
        q.nest(2) do
          deconstructed = deconstruct_keys([])
@ -171,67 +216,10 @@ module YARP
    end
  end
  # A class that knows how to walk down the tree. None of the individual visit
  # methods are implemented on this visitor, so it forces the consumer to
  # implement each one that they need. For a default implementation that
  # continues walking the tree, see the Visitor class.
  class BasicVisitor
    def visit(node)
      node&.accept(self)
    end
    def visit_all(nodes)
      nodes.map { |node| visit(node) }
    end
    def visit_child_nodes(node)
      visit_all(node.child_nodes)
    end
  end
  # This lexes with the Ripper lex. It drops any space events but otherwise
  # returns the same tokens.
  # [raises SyntaxError] if the syntax in source is invalid
  def self.lex_ripper(source)
    previous = []
    results = []
    Ripper.lex(source, raise_errors: true).each do |token|
      case token[1]
      when :on_sp
        # skip
      when :on_tstring_content
        if previous[1] == :on_tstring_content &&
            (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
          previous[2] << token[2]
        else
          results << token
          previous = token
        end
      when :on_words_sep
        if previous[1] == :on_words_sep
          previous[2] << token[2]
        else
          results << token
          previous = token
        end
      else
        results << token
        previous = token
      end
    end
    results
  end
  # Load the serialized AST using the source as a reference into a tree.
  def self.load(source, serialized)
    Serialize.load(source, serialized)
  end
  def self.parse(source, filepath=nil)
    _parse(source, filepath)
  end
 end
 require_relative "yarp/lex_compat"
@ -240,9 +228,3 @@ require_relative "yarp/ripper_compat"
 require_relative "yarp/serialize"
 require_relative "yarp/pack"
 require "yarp.so"
 module YARP
  class << self
    private :_parse
  end
 end
--- a/lib/yarp/lex_compat.rb
+++ b/lib/yarp/lex_compat.rb
@ -534,12 +534,11 @@ module YARP
      end
    end
-    attr_reader :source, :offsets, :filepath
+    attr_reader :source, :filepath
    def initialize(source, filepath = "")
      @source = source
      @filepath = filepath || ""
      @offsets = find_offsets(source)
    end
    def result
@ -561,7 +560,8 @@ module YARP
      result_value[0][0].value.prepend("\xEF\xBB\xBF") if bom
      result_value.each_with_index do |(token, lex_state), index|
-        (lineno, column) = find_location(token.location.start_offset)
+        lineno = token.location.start_line
        column = token.location.start_column
        column -= index == 0 ? 6 : 3 if bom && lineno == 1
        event = RIPPER.fetch(token.type)
@ -702,38 +702,6 @@ module YARP
      ParseResult.new(tokens, result.comments, result.errors, result.warnings)
    end
    private
    # YARP keeps locations around in the form of ranges of byte offsets from the
    # start of the file. Ripper keeps locations around in the form of line and
    # column numbers. To match the output, we keep a cache of the offsets at the
    # beginning of each line.
    def find_offsets(source)
      last_offset = 0
      offsets = [0]
      source.each_line do |line|
        last_offset += line.bytesize
        offsets << last_offset
      end
      offsets
    end
    # Given a byte offset, find the line number and column number that it maps
    # to. We use a binary search over the cached offsets to find the line number
    # that the offset is on, and then subtract the offset of the previous line
    # to find the column number.
    def find_location(value)
      line_number = offsets.bsearch_index { |offset| offset > value }
      line_offset = offsets[line_number - 1] if line_number
      [
        line_number || offsets.length - 1,
        value - (line_offset || offsets.last)
      ]
    end
  end
  # The constant that wraps the behavior of the lexer to match Ripper's output
@ -746,4 +714,39 @@ module YARP
  def self.lex_compat(source, filepath = "")
    LexCompat.new(source, filepath).result
  end
  # This lexes with the Ripper lex. It drops any space events but otherwise
  # returns the same tokens. Raises SyntaxError if the syntax in source is
  # invalid.
  def self.lex_ripper(source)
    previous = []
    results = []
    Ripper.lex(source, raise_errors: true).each do |token|
      case token[1]
      when :on_sp
        # skip
      when :on_tstring_content
        if previous[1] == :on_tstring_content &&
            (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
          previous[2] << token[2]
        else
          results << token
          previous = token
        end
      when :on_words_sep
        if previous[1] == :on_words_sep
          previous[2] << token[2]
        else
          results << token
          previous = token
        end
      else
        results << token
        previous = token
      end
    end
    results
  end
 end
--- a/lib/yarp/node.rb
+++ b/lib/yarp/node.rb
--- a/lib/yarp/serialize.rb
+++ b/lib/yarp/serialize.rb
@ -9,26 +9,30 @@ require "stringio"
 module YARP
  module Serialize
-    def self.load(source, serialized)
+    def self.load(input, serialized)
      io = StringIO.new(serialized)
      io.set_encoding(Encoding::BINARY)
-      Loader.new(source, serialized, io).load
+      Loader.new(input, serialized, io).load
    end
    class Loader
-      attr_reader :encoding, :source, :serialized, :io
+      attr_reader :encoding, :input, :serialized, :io
-      attr_reader :constant_pool_offset, :constant_pool
+      attr_reader :constant_pool_offset, :constant_pool, :source
-      def initialize(source, serialized, io)
+      def initialize(input, serialized, io)
        @encoding = Encoding::UTF_8
-        @source = source.dup
+        @input = input.dup
        @serialized = serialized
        @io = io
        @constant_pool_offset = nil
        @constant_pool = nil
        offsets = [0]
        input.b.scan("\n") { offsets << $~.end(0) }
        @source = Source.new(input, offsets)
      end
      def load
@ -36,7 +40,7 @@ module YARP
        io.read(3).unpack("C3") => [0, 4, 0]
        @encoding = Encoding.find(io.read(load_varint))
-        @source = source.force_encoding(@encoding).freeze
+        @input = input.force_encoding(@encoding).freeze
        @constant_pool_offset = io.read(4).unpack1("L")
        @constant_pool = Array.new(load_varint, nil)
@ -78,7 +82,7 @@ module YARP
      end
      def load_location
-        Location.new(load_varint, load_varint)
+        Location.new(source, load_varint, load_varint)
      end
      def load_optional_location
@ -95,7 +99,7 @@ module YARP
          start = serialized.unpack1("L", offset: offset)
          length = serialized.unpack1("L", offset: offset + 4)
-          constant = source.byteslice(start, length).to_sym
+          constant = input.byteslice(start, length).to_sym
          constant_pool[index] = constant
        end
@ -104,262 +108,262 @@ module YARP
      def load_node
        type = io.getbyte
-        start_offset, length = load_varint, load_varint
+        location = load_location
        case type
        when 1 then
-          AliasNode.new(load_node, load_node, load_location, start_offset, length)
+          AliasNode.new(load_node, load_node, load_location, location)
        when 2 then
-          AlternationPatternNode.new(load_node, load_node, load_location, start_offset, length)
+          AlternationPatternNode.new(load_node, load_node, load_location, location)
        when 3 then
-          AndNode.new(load_node, load_node, load_location, start_offset, length)
+          AndNode.new(load_node, load_node, load_location, location)
        when 4 then
-          ArgumentsNode.new(Array.new(load_varint) { load_node }, start_offset, length)
+          ArgumentsNode.new(Array.new(load_varint) { load_node }, location)
        when 5 then
-          ArrayNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, start_offset, length)
+          ArrayNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, location)
        when 6 then
-          ArrayPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, start_offset, length)
+          ArrayPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, location)
        when 7 then
-          AssocNode.new(load_node, load_optional_node, load_optional_location, start_offset, length)
+          AssocNode.new(load_node, load_optional_node, load_optional_location, location)
        when 8 then
-          AssocSplatNode.new(load_optional_node, load_location, start_offset, length)
+          AssocSplatNode.new(load_optional_node, load_location, location)
        when 9 then
-          BackReferenceReadNode.new(start_offset, length)
+          BackReferenceReadNode.new(location)
        when 10 then
-          BeginNode.new(load_optional_location, load_optional_node, load_optional_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length)
+          BeginNode.new(load_optional_location, load_optional_node, load_optional_node, load_optional_node, load_optional_node, load_optional_location, location)
        when 11 then
-          BlockArgumentNode.new(load_optional_node, load_location, start_offset, length)
+          BlockArgumentNode.new(load_optional_node, load_location, location)
        when 12 then
-          BlockNode.new(Array.new(load_varint) { load_constant }, load_optional_node, load_optional_node, load_location, load_location, start_offset, length)
+          BlockNode.new(Array.new(load_varint) { load_constant }, load_optional_node, load_optional_node, load_location, load_location, location)
        when 13 then
-          BlockParameterNode.new(load_optional_location, load_location, start_offset, length)
+          BlockParameterNode.new(load_optional_location, load_location, location)
        when 14 then
-          BlockParametersNode.new(load_optional_node, Array.new(load_varint) { load_location }, load_optional_location, load_optional_location, start_offset, length)
+          BlockParametersNode.new(load_optional_node, Array.new(load_varint) { load_location }, load_optional_location, load_optional_location, location)
        when 15 then
-          BreakNode.new(load_optional_node, load_location, start_offset, length)
+          BreakNode.new(load_optional_node, load_location, location)
        when 16 then
-          CallNode.new(load_optional_node, load_optional_location, load_optional_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, load_varint, load_string, start_offset, length)
+          CallNode.new(load_optional_node, load_optional_location, load_optional_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, load_varint, load_string, location)
        when 17 then
-          CallOperatorAndWriteNode.new(load_node, load_location, load_node, start_offset, length)
+          CallOperatorAndWriteNode.new(load_node, load_location, load_node, location)
        when 18 then
-          CallOperatorOrWriteNode.new(load_node, load_node, load_location, start_offset, length)
+          CallOperatorOrWriteNode.new(load_node, load_node, load_location, location)
        when 19 then
-          CallOperatorWriteNode.new(load_node, load_location, load_node, load_constant, start_offset, length)
+          CallOperatorWriteNode.new(load_node, load_location, load_node, load_constant, location)
        when 20 then
-          CapturePatternNode.new(load_node, load_node, load_location, start_offset, length)
+          CapturePatternNode.new(load_node, load_node, load_location, location)
        when 21 then
-          CaseNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_location, load_location, start_offset, length)
+          CaseNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_location, load_location, location)
        when 22 then
-          ClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_location, load_optional_node, load_optional_node, load_location, start_offset, length)
+          ClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_location, load_optional_node, load_optional_node, load_location, location)
        when 23 then
-          ClassVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
+          ClassVariableOperatorAndWriteNode.new(load_location, load_location, load_node, location)
        when 24 then
-          ClassVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
+          ClassVariableOperatorOrWriteNode.new(load_location, load_location, load_node, location)
        when 25 then
-          ClassVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
+          ClassVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
        when 26 then
-          ClassVariableReadNode.new(start_offset, length)
+          ClassVariableReadNode.new(location)
        when 27 then
-          ClassVariableWriteNode.new(load_location, load_optional_node, load_optional_location, start_offset, length)
+          ClassVariableWriteNode.new(load_location, load_optional_node, load_optional_location, location)
        when 28 then
-          ConstantOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
+          ConstantOperatorAndWriteNode.new(load_location, load_location, load_node, location)
        when 29 then
-          ConstantOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
+          ConstantOperatorOrWriteNode.new(load_location, load_location, load_node, location)
        when 30 then
-          ConstantOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
+          ConstantOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
        when 31 then
-          ConstantPathNode.new(load_optional_node, load_node, load_location, start_offset, length)
+          ConstantPathNode.new(load_optional_node, load_node, load_location, location)
        when 32 then
-          ConstantPathOperatorAndWriteNode.new(load_node, load_location, load_node, start_offset, length)
+          ConstantPathOperatorAndWriteNode.new(load_node, load_location, load_node, location)
        when 33 then
-          ConstantPathOperatorOrWriteNode.new(load_node, load_location, load_node, start_offset, length)
+          ConstantPathOperatorOrWriteNode.new(load_node, load_location, load_node, location)
        when 34 then
-          ConstantPathOperatorWriteNode.new(load_node, load_location, load_node, load_constant, start_offset, length)
+          ConstantPathOperatorWriteNode.new(load_node, load_location, load_node, load_constant, location)
        when 35 then
-          ConstantPathWriteNode.new(load_node, load_optional_location, load_optional_node, start_offset, length)
+          ConstantPathWriteNode.new(load_node, load_optional_location, load_optional_node, location)
        when 36 then
-          ConstantReadNode.new(start_offset, length)
+          ConstantReadNode.new(location)
        when 37 then
          load_serialized_length
-          DefNode.new(load_location, load_optional_node, load_optional_node, load_optional_node, Array.new(load_varint) { load_constant }, load_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, start_offset, length)
+          DefNode.new(load_location, load_optional_node, load_optional_node, load_optional_node, Array.new(load_varint) { load_constant }, load_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, location)
        when 38 then
-          DefinedNode.new(load_optional_location, load_node, load_optional_location, load_location, start_offset, length)
+          DefinedNode.new(load_optional_location, load_node, load_optional_location, load_location, location)
        when 39 then
-          ElseNode.new(load_location, load_optional_node, load_optional_location, start_offset, length)
+          ElseNode.new(load_location, load_optional_node, load_optional_location, location)
        when 40 then
-          EmbeddedStatementsNode.new(load_location, load_optional_node, load_location, start_offset, length)
+          EmbeddedStatementsNode.new(load_location, load_optional_node, load_location, location)
        when 41 then
-          EmbeddedVariableNode.new(load_location, load_node, start_offset, length)
+          EmbeddedVariableNode.new(load_location, load_node, location)
        when 42 then
-          EnsureNode.new(load_location, load_optional_node, load_location, start_offset, length)
+          EnsureNode.new(load_location, load_optional_node, load_location, location)
        when 43 then
-          FalseNode.new(start_offset, length)
+          FalseNode.new(location)
        when 44 then
-          FindPatternNode.new(load_optional_node, load_node, Array.new(load_varint) { load_node }, load_node, load_optional_location, load_optional_location, start_offset, length)
+          FindPatternNode.new(load_optional_node, load_node, Array.new(load_varint) { load_node }, load_node, load_optional_location, load_optional_location, location)
        when 45 then
-          FloatNode.new(start_offset, length)
+          FloatNode.new(location)
        when 46 then
-          ForNode.new(load_node, load_node, load_optional_node, load_location, load_location, load_optional_location, load_location, start_offset, length)
+          ForNode.new(load_node, load_node, load_optional_node, load_location, load_location, load_optional_location, load_location, location)
        when 47 then
-          ForwardingArgumentsNode.new(start_offset, length)
+          ForwardingArgumentsNode.new(location)
        when 48 then
-          ForwardingParameterNode.new(start_offset, length)
+          ForwardingParameterNode.new(location)
        when 49 then
-          ForwardingSuperNode.new(load_optional_node, start_offset, length)
+          ForwardingSuperNode.new(load_optional_node, location)
        when 50 then
-          GlobalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
+          GlobalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, location)
        when 51 then
-          GlobalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
+          GlobalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, location)
        when 52 then
-          GlobalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
+          GlobalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
        when 53 then
-          GlobalVariableReadNode.new(start_offset, length)
+          GlobalVariableReadNode.new(location)
        when 54 then
-          GlobalVariableWriteNode.new(load_location, load_optional_location, load_optional_node, start_offset, length)
+          GlobalVariableWriteNode.new(load_location, load_optional_location, load_optional_node, location)
        when 55 then
-          HashNode.new(load_location, Array.new(load_varint) { load_node }, load_location, start_offset, length)
+          HashNode.new(load_location, Array.new(load_varint) { load_node }, load_location, location)
        when 56 then
-          HashPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_location, load_optional_location, start_offset, length)
+          HashPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_location, load_optional_location, location)
        when 57 then
-          IfNode.new(load_optional_location, load_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length)
+          IfNode.new(load_optional_location, load_node, load_optional_node, load_optional_node, load_optional_location, location)
        when 58 then
-          ImaginaryNode.new(load_node, start_offset, length)
+          ImaginaryNode.new(load_node, location)
        when 59 then
-          InNode.new(load_node, load_optional_node, load_location, load_optional_location, start_offset, length)
+          InNode.new(load_node, load_optional_node, load_location, load_optional_location, location)
        when 60 then
-          InstanceVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
+          InstanceVariableOperatorAndWriteNode.new(load_location, load_location, load_node, location)
        when 61 then
-          InstanceVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
+          InstanceVariableOperatorOrWriteNode.new(load_location, load_location, load_node, location)
        when 62 then
-          InstanceVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
+          InstanceVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
        when 63 then
-          InstanceVariableReadNode.new(start_offset, length)
+          InstanceVariableReadNode.new(location)
        when 64 then
-          InstanceVariableWriteNode.new(load_location, load_optional_node, load_optional_location, start_offset, length)
+          InstanceVariableWriteNode.new(load_location, load_optional_node, load_optional_location, location)
        when 65 then
-          IntegerNode.new(start_offset, length)
+          IntegerNode.new(location)
        when 66 then
-          InterpolatedRegularExpressionNode.new(load_location, Array.new(load_varint) { load_node }, load_location, load_varint, start_offset, length)
+          InterpolatedRegularExpressionNode.new(load_location, Array.new(load_varint) { load_node }, load_location, load_varint, location)
        when 67 then
-          InterpolatedStringNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, start_offset, length)
+          InterpolatedStringNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, location)
        when 68 then
-          InterpolatedSymbolNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, start_offset, length)
+          InterpolatedSymbolNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, location)
        when 69 then
-          InterpolatedXStringNode.new(load_location, Array.new(load_varint) { load_node }, load_location, start_offset, length)
+          InterpolatedXStringNode.new(load_location, Array.new(load_varint) { load_node }, load_location, location)
        when 70 then
-          KeywordHashNode.new(Array.new(load_varint) { load_node }, start_offset, length)
+          KeywordHashNode.new(Array.new(load_varint) { load_node }, location)
        when 71 then
-          KeywordParameterNode.new(load_location, load_optional_node, start_offset, length)
+          KeywordParameterNode.new(load_location, load_optional_node, location)
        when 72 then
-          KeywordRestParameterNode.new(load_location, load_optional_location, start_offset, length)
+          KeywordRestParameterNode.new(load_location, load_optional_location, location)
        when 73 then
-          LambdaNode.new(Array.new(load_varint) { load_constant }, load_location, load_optional_node, load_optional_node, start_offset, length)
+          LambdaNode.new(Array.new(load_varint) { load_constant }, load_location, load_optional_node, load_optional_node, location)
        when 74 then
-          LocalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
+          LocalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, load_constant, location)
        when 75 then
-          LocalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
+          LocalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, load_constant, location)
        when 76 then
-          LocalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, load_constant, start_offset, length)
+          LocalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, load_constant, location)
        when 77 then
-          LocalVariableReadNode.new(load_constant, load_varint, start_offset, length)
+          LocalVariableReadNode.new(load_constant, load_varint, location)
        when 78 then
-          LocalVariableWriteNode.new(load_constant, load_varint, load_optional_node, load_location, load_optional_location, start_offset, length)
+          LocalVariableWriteNode.new(load_constant, load_varint, load_optional_node, load_location, load_optional_location, location)
        when 79 then
-          MatchPredicateNode.new(load_node, load_node, load_location, start_offset, length)
+          MatchPredicateNode.new(load_node, load_node, load_location, location)
        when 80 then
-          MatchRequiredNode.new(load_node, load_node, load_location, start_offset, length)
+          MatchRequiredNode.new(load_node, load_node, load_location, location)
        when 81 then
-          MissingNode.new(start_offset, length)
+          MissingNode.new(location)
        when 82 then
-          ModuleNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_node, load_location, start_offset, length)
+          ModuleNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_node, load_location, location)
        when 83 then
-          MultiWriteNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_location, load_optional_location, start_offset, length)
+          MultiWriteNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_location, load_optional_location, location)
        when 84 then
-          NextNode.new(load_optional_node, load_location, start_offset, length)
+          NextNode.new(load_optional_node, load_location, location)
        when 85 then
-          NilNode.new(start_offset, length)
+          NilNode.new(location)
        when 86 then
-          NoKeywordsParameterNode.new(load_location, load_location, start_offset, length)
+          NoKeywordsParameterNode.new(load_location, load_location, location)
        when 87 then
-          NumberedReferenceReadNode.new(start_offset, length)
+          NumberedReferenceReadNode.new(location)
        when 88 then
-          OptionalParameterNode.new(load_constant, load_location, load_location, load_node, start_offset, length)
+          OptionalParameterNode.new(load_constant, load_location, load_location, load_node, location)
        when 89 then
-          OrNode.new(load_node, load_node, load_location, start_offset, length)
+          OrNode.new(load_node, load_node, load_location, location)
        when 90 then
-          ParametersNode.new(Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_node, start_offset, length)
+          ParametersNode.new(Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_node, location)
        when 91 then
-          ParenthesesNode.new(load_optional_node, load_location, load_location, start_offset, length)
+          ParenthesesNode.new(load_optional_node, load_location, load_location, location)
        when 92 then
-          PinnedExpressionNode.new(load_node, load_location, load_location, load_location, start_offset, length)
+          PinnedExpressionNode.new(load_node, load_location, load_location, load_location, location)
        when 93 then
-          PinnedVariableNode.new(load_node, load_location, start_offset, length)
+          PinnedVariableNode.new(load_node, load_location, location)
        when 94 then
-          PostExecutionNode.new(load_optional_node, load_location, load_location, load_location, start_offset, length)
+          PostExecutionNode.new(load_optional_node, load_location, load_location, load_location, location)
        when 95 then
-          PreExecutionNode.new(load_optional_node, load_location, load_location, load_location, start_offset, length)
+          PreExecutionNode.new(load_optional_node, load_location, load_location, load_location, location)
        when 96 then
-          ProgramNode.new(Array.new(load_varint) { load_constant }, load_node, start_offset, length)
+          ProgramNode.new(Array.new(load_varint) { load_constant }, load_node, location)
        when 97 then
-          RangeNode.new(load_optional_node, load_optional_node, load_location, load_varint, start_offset, length)
+          RangeNode.new(load_optional_node, load_optional_node, load_location, load_varint, location)
        when 98 then
-          RationalNode.new(load_node, start_offset, length)
+          RationalNode.new(load_node, location)
        when 99 then
-          RedoNode.new(start_offset, length)
+          RedoNode.new(location)
        when 100 then
-          RegularExpressionNode.new(load_location, load_location, load_location, load_string, load_varint, start_offset, length)
+          RegularExpressionNode.new(load_location, load_location, load_location, load_string, load_varint, location)
        when 101 then
-          RequiredDestructuredParameterNode.new(Array.new(load_varint) { load_node }, load_location, load_location, start_offset, length)
+          RequiredDestructuredParameterNode.new(Array.new(load_varint) { load_node }, load_location, load_location, location)
        when 102 then
-          RequiredParameterNode.new(load_constant, start_offset, length)
+          RequiredParameterNode.new(load_constant, location)
        when 103 then
-          RescueModifierNode.new(load_node, load_location, load_node, start_offset, length)
+          RescueModifierNode.new(load_node, load_location, load_node, location)
        when 104 then
-          RescueNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_node, load_optional_node, start_offset, length)
+          RescueNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_node, load_optional_node, location)
        when 105 then
-          RestParameterNode.new(load_location, load_optional_location, start_offset, length)
+          RestParameterNode.new(load_location, load_optional_location, location)
        when 106 then
-          RetryNode.new(start_offset, length)
+          RetryNode.new(location)
        when 107 then
-          ReturnNode.new(load_location, load_optional_node, start_offset, length)
+          ReturnNode.new(load_location, load_optional_node, location)
        when 108 then
-          SelfNode.new(start_offset, length)
+          SelfNode.new(location)
        when 109 then
-          SingletonClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_location, load_node, load_optional_node, load_location, start_offset, length)
+          SingletonClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_location, load_node, load_optional_node, load_location, location)
        when 110 then
-          SourceEncodingNode.new(start_offset, length)
+          SourceEncodingNode.new(location)
        when 111 then
-          SourceFileNode.new(load_string, start_offset, length)
+          SourceFileNode.new(load_string, location)
        when 112 then
-          SourceLineNode.new(start_offset, length)
+          SourceLineNode.new(location)
        when 113 then
-          SplatNode.new(load_location, load_optional_node, start_offset, length)
+          SplatNode.new(load_location, load_optional_node, location)
        when 114 then
-          StatementsNode.new(Array.new(load_varint) { load_node }, start_offset, length)
+          StatementsNode.new(Array.new(load_varint) { load_node }, location)
        when 115 then
-          StringConcatNode.new(load_node, load_node, start_offset, length)
+          StringConcatNode.new(load_node, load_node, location)
        when 116 then
-          StringNode.new(load_optional_location, load_location, load_optional_location, load_string, start_offset, length)
+          StringNode.new(load_optional_location, load_location, load_optional_location, load_string, location)
        when 117 then
-          SuperNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, start_offset, length)
+          SuperNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, location)
        when 118 then
-          SymbolNode.new(load_optional_location, load_location, load_optional_location, load_string, start_offset, length)
+          SymbolNode.new(load_optional_location, load_location, load_optional_location, load_string, location)
        when 119 then
-          TrueNode.new(start_offset, length)
+          TrueNode.new(location)
        when 120 then
-          UndefNode.new(Array.new(load_varint) { load_node }, load_location, start_offset, length)
+          UndefNode.new(Array.new(load_varint) { load_node }, load_location, location)
        when 121 then
-          UnlessNode.new(load_location, load_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length)
+          UnlessNode.new(load_location, load_node, load_optional_node, load_optional_node, load_optional_location, location)
        when 122 then
-          UntilNode.new(load_location, load_node, load_optional_node, start_offset, length)
+          UntilNode.new(load_location, load_node, load_optional_node, location)
        when 123 then
-          WhenNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_node, start_offset, length)
+          WhenNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_node, location)
        when 124 then
-          WhileNode.new(load_location, load_node, load_optional_node, start_offset, length)
+          WhileNode.new(load_location, load_node, load_optional_node, location)
        when 125 then
-          XStringNode.new(load_location, load_location, load_location, load_string, start_offset, length)
+          XStringNode.new(load_location, load_location, load_location, load_string, location)
        when 126 then
-          YieldNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, start_offset, length)
+          YieldNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, location)
        end
      end
    end
--- a/test/yarp/compile_test.rb
+++ b/test/yarp/compile_test.rb
@ -1,212 +0,0 @@
 # frozen_string_literal: true
 require "yarp_test_helper"
 class CompileTest < Test::Unit::TestCase
  def test_AliasNode
    assert_compiles("alias foo bar")
  end
  def test_AndNode
    assert_compiles("true && false")
  end
  def test_ArrayNode
    assert_compiles("[]")
    assert_compiles("[foo, bar, baz]")
  end
  def test_AssocNode
    assert_compiles("{ foo: bar }")
  end
  def test_BlockNode
    assert_compiles("foo { bar }")
  end
  def test_BlockNode_with_optionals
    assert_compiles("foo { |x = 1| bar }")
  end
  def test_CallNode
    assert_compiles("foo")
    assert_compiles("foo(bar)")
  end
  def test_ClassVariableReadNode
    assert_compiles("@@foo")
  end
  def test_ClassVariableWriteNode
    assert_compiles("@@foo = 1")
  end
  def test_FalseNode
    assert_compiles("false")
  end
  def test_GlobalVariableReadNode
    assert_compiles("$foo")
  end
  def test_GlobalVariableWriteNode
    assert_compiles("$foo = 1")
  end
  def test_HashNode
    assert_compiles("{ foo: bar }")
  end
  def test_InstanceVariableReadNode
    assert_compiles("@foo")
  end
  def test_InstanceVariableWriteNode
    assert_compiles("@foo = 1")
  end
  def test_IntegerNode
    assert_compiles("1")
    assert_compiles("1_000")
  end
  def test_InterpolatedStringNode
    assert_compiles("\"foo \#{bar} baz\"")
  end
  def test_LocalVariableWriteNode
    assert_compiles("foo = 1")
  end
  def test_LocalVariableReadNode
    assert_compiles("[foo = 1, foo]")
  end
  def test_NilNode
    assert_compiles("nil")
  end
  def test_OrNode
    assert_compiles("true || false")
  end
  def test_ParenthesesNode
    assert_compiles("()")
  end
  def test_ProgramNode
    assert_compiles("")
  end
  def test_RangeNode
    assert_compiles("foo..bar")
    assert_compiles("foo...bar")
    assert_compiles("(foo..)")
    assert_compiles("(foo...)")
    assert_compiles("(..bar)")
    assert_compiles("(...bar)")
  end
  def test_SelfNode
    assert_compiles("self")
  end
  def test_StringNode
    assert_compiles("\"foo\"")
  end
  def test_SymbolNode
    assert_compiles(":foo")
  end
  def test_TrueNode
    assert_compiles("true")
  end
  def test_UndefNode
    assert_compiles("undef :foo, :bar, :baz")
  end
  def test_XStringNode
    assert_compiles("`foo`")
  end
  private
  def assert_compiles(source)
    assert_equal_iseqs(rubyvm_compile(source), YARP.compile(source))
  end
  # Instruction sequences have 13 elements in their lists. We don't currently
  # support all of the fields, so we can't compare the iseqs directly. Instead,
  # we compare the elements that we do support.
  def assert_equal_iseqs(expected, actual)
    # The first element is the magic comment string.
    assert_equal expected[0], actual[0]
    # The next three elements are the major, minor, and patch version numbers.
    # TODO: Insert this check once Ruby 3.3 is released, and the TruffleRuby
    # GitHub workflow also checks against Ruby 3.3
    # assert_equal expected[1...4], actual[1...4]
    # The next element is a set of options for the iseq. It has lots of
    # different information, some of which we support and some of which we
    # don't.
    assert_equal expected[4][:arg_size], actual[4][:arg_size], "Unexpected difference in arg_size"
    assert_equal expected[4][:stack_max], actual[4][:stack_max], "Unexpected difference in stack_max"
    assert_kind_of Integer, actual[4][:local_size]
    assert_kind_of Integer, actual[4][:node_id]
    assert_equal expected[4][:code_location].length, actual[4][:code_location].length, "Unexpected difference in code_location length"
    assert_equal expected[4][:node_ids].length, actual[4][:node_ids].length, "Unexpected difference in node_ids length"
    # Then we have the name of the iseq, the relative file path, the absolute
    # file path, and the line number. We don't have this working quite yet.
    assert_kind_of String, actual[5]
    assert_kind_of String, actual[6]
    assert_kind_of String, actual[7]
    assert_kind_of Integer, actual[8]
    # Next we have the type of the iseq.
    assert_equal expected[9], actual[9]
    # Next we have the list of local variables. We don't support this yet.
    assert_kind_of Array, actual[10]
    # Next we have the argument options. These are used in block and method
    # iseqs to reflect how the arguments are passed.
    assert_equal expected[11], actual[11], "Unexpected difference in argument options"
    # Next we have the catch table entries. We don't have this working yet.
    assert_kind_of Array, actual[12]
    # Finally we have the actual instructions. We support some of this, but omit
    # line numbers and some tracepoint events.
    expected[13].each do |insn|
      case insn
      in [:send, opnds, expected_block] unless expected_block.nil?
        actual[13].shift => [:send, ^(opnds), actual_block]
        assert_equal_iseqs expected_block, actual_block
      in Array | :RUBY_EVENT_B_CALL | :RUBY_EVENT_B_RETURN | /^label_\d+/
        assert_equal insn, actual[13].shift
      in Integer | /^RUBY_EVENT_/
        # skip these for now
      else
        flunk "Unexpected instruction: #{insn.inspect}"
      end
    end
  end
  def rubyvm_compile(source)
    options = {
      peephole_optimization: false,
      specialized_instruction: false,
      operands_unification: false,
      instructions_unification: false,
      frozen_string_literal: false
    }
    RubyVM::InstructionSequence.compile(source, **options).to_a
  end
 end
--- a/test/yarp/encoding_test.rb
+++ b/test/yarp/encoding_test.rb
@ -55,6 +55,21 @@ class EncodingTest < Test::Unit::TestCase
    assert_equal Encoding.find("utf-8"), actual
  end
  # This test may be a little confusing. Basically when we use our strpbrk, it
  # takes into account the encoding of the file.
  def test_strpbrk_multibyte
    result = YARP.parse(<<~RUBY)
      # encoding: Shift_JIS
      %w[\x81\x5c]
    RUBY
    assert(result.errors.empty?)
    assert_equal(
      (+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
      result.value.statements.body.first.elements.first.unescaped
    )
  end
  def test_utf_8_variations
    %w[
      utf-8-unix
--- a/test/yarp/fixtures/not.txt
+++ b/test/yarp/fixtures/not.txt
@ -18,3 +18,16 @@ not foo and
  bar
 not(foo
 )
 not(
 foo
  )
--- a/test/yarp/fixtures/patterns.txt
+++ b/test/yarp/fixtures/patterns.txt
@ -76,6 +76,7 @@ foo => Foo(*bar, baz, *qux)
 foo => Foo[]
 foo => Foo[1]
 foo => Foo[1, 2, 3]
 foo => Foo[Foo[]]
 foo => Foo[bar]
 foo => Foo[*bar, baz]
 foo => Foo[bar, *baz]
--- a/test/yarp/language_server_test.rb
+++ b/test/yarp/language_server_test.rb
@ -1,363 +0,0 @@
 # frozen_string_literal: true
 require_relative "yarp_test_helper"
 require "yarp/language_server"
 module YARP
  class LanguageServerTest < Test::Unit::TestCase
    module Request
      # Represents a hash pattern.
      class Shape
        attr_reader :values
        def initialize(values)
          @values = values
        end
        def ===(other)
          values.all? do |key, value|
            value == :any ? other.key?(key) : value === other[key]
          end
        end
      end
      # Represents an array pattern.
      class Tuple
        attr_reader :values
        def initialize(values)
          @values = values
        end
        def ===(other)
          values.each_with_index.all? { |value, index| value === other[index] }
        end
      end
      def self.[](value)
        case value
        when Array
          Tuple.new(value.map { |child| self[child] })
        when Hash
          Shape.new(value.transform_values { |child| self[child] })
        else
          value
        end
      end
    end
    class Initialize < Struct.new(:id)
      def to_hash
        { method: "initialize", id: id }
      end
    end
    class Shutdown < Struct.new(:id)
      def to_hash
        { method: "shutdown", id: id }
      end
    end
    class TextDocumentDidOpen < Struct.new(:uri, :text)
      def to_hash
        {
          method: "textDocument/didOpen",
          params: { textDocument: { uri: uri, text: text } }
        }
      end
    end
    class TextDocumentDidChange < Struct.new(:uri, :text)
      def to_hash
        {
          method: "textDocument/didChange",
          params: {
            textDocument: { uri: uri },
            contentChanges: [{ text: text }]
          }
        }
      end
    end
    class TextDocumentDidClose < Struct.new(:uri)
      def to_hash
        {
          method: "textDocument/didClose",
          params: { textDocument: { uri: uri } }
        }
      end
    end
    class TextDocumentCodeAction < Struct.new(:id, :uri, :diagnostics)
      def to_hash
        {
          method: "textDocument/codeAction",
          id: id,
          params: {
            textDocument: { uri: uri },
            context: {
              diagnostics: diagnostics,
            },
          },
        }
      end
    end
    class TextDocumentDiagnostic < Struct.new(:id, :uri)
      def to_hash
        {
          method: "textDocument/diagnostic",
          id: id,
          params: {
            textDocument: { uri: uri },
          }
        }
      end
    end
    def test_reading_file
      Tempfile.create(%w[test- .rb]) do |file|
        file.write("class Foo; end")
        file.rewind
        responses = run_server([
          Initialize.new(1),
          Shutdown.new(3)
        ])
        shape = Request[[
          { id: 1, result: { capabilities: Hash } },
          { id: 3, result: {} }
        ]]
        assert_operator(shape, :===, responses)
      end
    end
    def test_clean_shutdown
      responses = run_server([Initialize.new(1), Shutdown.new(2)])
      shape = Request[[
        { id: 1, result: { capabilities: Hash } },
        { id: 2, result: {} }
      ]]
      assert_operator(shape, :===, responses)
    end
    def test_file_that_does_not_exist
      responses = run_server([
        Initialize.new(1),
        Shutdown.new(3)
      ])
      shape = Request[[
        { id: 1, result: { capabilities: Hash } },
        { id: 3, result: {} }
      ]]
      assert_operator(shape, :===, responses)
    end
    def test_code_action_request
      message = "this is an error"
      diagnostic = {
        range: { start: { line: 0, character: 0 }, end: { line: 0, character: 0 } },
        message: message,
        severity: 1,
      }
      responses = run_server([
        Initialize.new(1),
        TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
          1 + (
        RUBY
        TextDocumentCodeAction.new(2, "file:///path/to/file.rb", [diagnostic]),
        Shutdown.new(3)
      ])
      shape = Request[[
        { id: 1, result: { capabilities: Hash } },
        { id: 2, result: [
            {
              title: "Report incorrect error: `#{message}`",
              kind: "quickfix",
              diagnostics: [diagnostic],
              command: {
                title: "Report incorrect error",
                command: "vscode.open",
                arguments: [String]
              }
            }
          ],
        },
        { id: 3, result: {} }
      ]]
      assert_operator(shape, :===, responses)
      assert(responses.dig(1, :result, 0, :command, :arguments, 0).include?(URI.encode_www_form_component(message)))
    end
    def test_code_action_request_no_diagnostic
      responses = run_server([
        Initialize.new(1),
        TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
          1 + (
        RUBY
        TextDocumentCodeAction.new(2, "file:///path/to/file.rb", []),
        Shutdown.new(3)
      ])
      shape = Request[[
        { id: 1, result: { capabilities: Hash } },
        { id: 2, result: [] },
        { id: 3, result: {} }
      ]]
      assert_operator(shape, :===, responses)
    end
    def test_code_action_request_no_content
      message = "this is an error"
      diagnostic = {
        range: { start: { line: 0, character: 0 }, end: { line: 0, character: 0 } },
        message: message,
        severity: 1,
      }
      responses = run_server([
        Initialize.new(1),
        TextDocumentCodeAction.new(2, "file:///path/to/file.rb", [diagnostic]),
        Shutdown.new(3)
      ])
      shape = Request[[
        { id: 1, result: { capabilities: Hash } },
        { id: 2, result: nil },
        { id: 3, result: {} }
      ]]
      assert_operator(shape, :===, responses)
    end
    def test_diagnostics_request_error
      responses = run_server([
        Initialize.new(1),
        TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
          1 + (
        RUBY
        TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
        Shutdown.new(3)
      ])
      shape = Request[[
        { id: 1, result: { capabilities: Hash } },
        { id: 2, result: { kind: "full", items: [
          {
            range: {
              start: { line: Integer, character: Integer },
              end: { line: Integer, character: Integer }
            },
            message: String,
            severity: Integer
          },
        ] } },
        { id: 3, result: {} }
      ]]
      assert_operator(shape, :===, responses)
      assert(responses.dig(1, :result, :items).count { |item| item[:severity] == 1 } > 0)
    end
    def test_diagnostics_request_warning
      responses = run_server([
        Initialize.new(1),
        TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
          a/b /c
        RUBY
        TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
        Shutdown.new(3)
      ])
      shape = Request[[
        { id: 1, result: { capabilities: Hash } },
        { id: 2, result: { kind: "full", items: [
          {
            range: {
              start: { line: Integer, character: Integer },
              end: { line: Integer, character: Integer }
            },
            message: String,
            severity: Integer
          },
        ] } },
        { id: 3, result: {} }
      ]]
      assert_operator(shape, :===, responses)
      assert(responses.dig(1, :result, :items).count { |item| item[:severity] == 2 } > 0)
    end
    def test_diagnostics_request_nothing
      responses = run_server([
        Initialize.new(1),
        TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
          a = 1
        RUBY
        TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
        Shutdown.new(3)
      ])
      shape = Request[[
        { id: 1, result: { capabilities: Hash } },
        { id: 2, result: { kind: "full", items: [] } },
        { id: 3, result: {} }
      ]]
      assert_operator(shape, :===, responses)
      assert_equal(0, responses.dig(1, :result, :items).size)
    end
    def test_diagnostics_request_no_content
      responses = run_server([
        Initialize.new(1),
        TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
        Shutdown.new(3)
      ])
      shape = Request[[
        { id: 1, result: { capabilities: Hash } },
        { id: 2, result: nil },
        { id: 3, result: {} }
      ]]
      assert_operator(shape, :===, responses)
    end
    private
    def write(content)
      request = content.to_hash.merge(jsonrpc: "2.0").to_json
      "Content-Length: #{request.bytesize}\r\n\r\n#{request}"
    end
    def read(content)
      [].tap do |messages|
        while (headers = content.gets("\r\n\r\n"))
          source = content.read(headers[/Content-Length: (\d+)/i, 1].to_i)
          messages << JSON.parse(source, symbolize_names: true)
        end
      end
    end
    def run_server(messages)
      input = StringIO.new(messages.map { |message| write(message) }.join)
      output = StringIO.new
      LanguageServer.new(
        input: input,
        output: output,
      ).run
      read(output.tap(&:rewind))
    end
  end
 end
--- a/test/yarp/parse_test.rb
+++ b/test/yarp/parse_test.rb
@ -3,10 +3,12 @@
 require "yarp_test_helper"
 class ParseTest < Test::Unit::TestCase
-  # Because we're reading the snapshots from disk, we need to make sure that
+  # When we pretty-print the trees to compare against the snapshots, we want to
-  # they're encoded as UTF-8. When certain settings are present this might not
+  # be certain that we print with the same external encoding. This is because
-  # always be the case (e.g., LANG=C or -Eascii-8bit). So here we force the
+  # methods like Symbol#inspect take into account external encoding and it could
-  # default external encoding for the duration of the test.
+  # change how the snapshot is generated. On machines with certain settings
  # (like LANG=C or -Eascii-8bit) this could have been changed. So here we're
  # going to force it to be UTF-8 to keep the snapshots consistent.
  def setup
    @previous_default_external = Encoding.default_external
    ignore_warnings { Encoding.default_external = Encoding::UTF_8 }
@ -29,20 +31,6 @@ class ParseTest < Test::Unit::TestCase
    seattlerb/pct_w_heredoc_interp_nested.txt
  ]
  # Because the filepath in SourceFileNodes is different from one maching to the
  # next, PP.pp(sexp, +"", 79) can have different results: both the path itself
  # and the line breaks based on the length of the path.
  def normalize_printed(printed)
    printed
      .gsub(
        /SourceFileNode \s*
          \(\s* (\d+\.\.\.\d+) \s*\) \s*
          \(\s* ("[^"]*")      \s*\)
        /mx,
        'SourceFileNode(\1)(\2)')
      .gsub(__dir__, "")
  end
  def find_source_file_node(node)
    if node.is_a?(YARP::SourceFileNode)
      node
@ -79,27 +67,26 @@ class ParseTest < Test::Unit::TestCase
      # that is invalid Ruby.
      refute_nil Ripper.sexp_raw(source)
      # Next, parse the source and print the value.
      result = YARP.parse_file(filepath)
      value = result.value
      printed = normalize_printed(PP.pp(value, +"", 79))
      # Next, assert that there were no errors during parsing.
-      assert_empty result.errors, value
+      result = YARP.parse(source, relative)
      assert_empty result.errors
      # Next, pretty print the source.
      printed = PP.pp(result.value, +"", 79)
      if File.exist?(snapshot)
-        normalized = normalize_printed(File.read(snapshot))
+        saved = File.read(snapshot)
        # If the snapshot file exists, but the printed value does not match the
        # snapshot, then update the snapshot file.
-        if normalized != printed
+        if printed != saved
-          File.write(snapshot, normalized)
+          File.write(snapshot, printed)
          warn("Updated snapshot at #{snapshot}.")
        end
        # If the snapshot file exists, then assert that the printed value
        # matches the snapshot.
-        assert_equal(normalized, printed)
+        assert_equal(saved, printed)
      else
        # If the snapshot file does not yet exist, then write it out now.
        File.write(snapshot, printed)
@ -108,11 +95,11 @@ class ParseTest < Test::Unit::TestCase
      # Next, assert that the value can be serialized and deserialized without
      # changing the shape of the tree.
-      assert_equal_nodes(value, YARP.load(source, YARP.dump(source, filepath)))
+      assert_equal_nodes(result.value, YARP.load(source, YARP.dump(source, relative)))
      # Next, assert that the newlines are in the expected places.
      expected_newlines = [0]
-      source.b.scan("\n") { expected_newlines << $~.offset(0)[0] }
+      source.b.scan("\n") { expected_newlines << $~.offset(0)[0] + 1 }
      assert_equal expected_newlines, YARP.newlines(source)
      # Finally, assert that we can lex the source and get the same tokens as
--- a/test/yarp/regexp_test.rb
+++ b/test/yarp/regexp_test.rb
@ -101,6 +101,10 @@ class RegexpTest < Test::Unit::TestCase
    refute_nil(YARP.named_captures("(?#foo)"))
  end
  def test_comments_with_escaped_parentheses
    refute_nil(YARP.named_captures("(?#foo\\)\\))"))
  end
  def test_non_capturing_groups
    refute_nil(YARP.named_captures("(?:foo)"))
  end
--- a/test/yarp/snapshots/keyword_method_names.txt
+++ b/test/yarp/snapshots/keyword_method_names.txt
@ -102,7 +102,7 @@ ProgramNode(0...185)(
     StringNode(123...129)((123...125), (125...128), (128...129), "abc"),
     DefNode(131...149)(
       (144...145),
-       SourceFileNode(135...143)("/fixtures/keyword_method_names.txt"),
+       SourceFileNode(135...143)("keyword_method_names.txt"),
       nil,
       nil,
       [],
--- a/test/yarp/snapshots/keywords.txt
+++ b/test/yarp/snapshots/keywords.txt
@ -5,7 +5,7 @@ ProgramNode(0...51)(
     RetryNode(6...11)(),
     SelfNode(13...17)(),
     SourceEncodingNode(19...31)(),
-     SourceFileNode(33...41)("/fixtures/keywords.txt"),
+     SourceFileNode(33...41)("keywords.txt"),
     SourceLineNode(43...51)()]
  )
 )
--- a/test/yarp/snapshots/not.txt
+++ b/test/yarp/snapshots/not.txt
@ -1,6 +1,6 @@
-ProgramNode(0...125)(
+ProgramNode(0...156)(
  [],
-  StatementsNode(0...125)(
+  StatementsNode(0...156)(
    [AndNode(0...19)(
       CallNode(0...7)(
         CallNode(4...7)(nil, nil, (4...7), nil, nil, nil, nil, 0, "foo"),
@ -146,6 +146,48 @@ ProgramNode(0...125)(
         "!"
       ),
       (108...111)
     ),
     CallNode(127...138)(
       CallNode(131...134)(
         nil,
         nil,
         (131...134),
         nil,
         nil,
         nil,
         nil,
         0,
         "foo"
       ),
       nil,
       (127...130),
       (130...131),
       nil,
       (137...138),
       nil,
       0,
       "!"
     ),
     CallNode(140...156)(
       CallNode(147...150)(
         nil,
         nil,
         (147...150),
         nil,
         nil,
         nil,
         nil,
         0,
         "foo"
       ),
       nil,
       (140...143),
       (143...144),
       nil,
       (155...156),
       nil,
       0,
       "!"
     )]
  )
 )
--- a/test/yarp/snapshots/patterns.txt
+++ b/test/yarp/snapshots/patterns.txt
--- a/test/yarp/snapshots/unparser/corpus/literal/pragma.txt
+++ b/test/yarp/snapshots/unparser/corpus/literal/pragma.txt
@ -2,7 +2,7 @@ ProgramNode(0...38)(
  [],
  StatementsNode(0...38)(
    [SourceEncodingNode(0...12)(),
-     SourceFileNode(13...21)("/fixtures/unparser/corpus/literal/pragma.txt"),
+     SourceFileNode(13...21)("unparser/corpus/literal/pragma.txt"),
     SourceLineNode(22...30)(),
     CallNode(31...38)(nil, nil, (31...38), nil, nil, nil, nil, 0, "__dir__")]
  )
--- a/test/yarp/snapshots/whitequark/pattern_matchingFILELINE_literals.txt
+++ b/test/yarp/snapshots/whitequark/pattern_matchingFILELINE_literals.txt
@ -3,7 +3,9 @@ ProgramNode(8...111)(
  StatementsNode(8...111)(
    [CaseNode(8...111)(
       ArrayNode(13...51)(
-         [SourceFileNode(14...22)("/fixtures/whitequark/pattern_matching__FILE__LINE_literals.txt"),
+         [SourceFileNode(14...22)(
            "whitequark/pattern_matching__FILE__LINE_literals.txt"
          ),
          CallNode(24...36)(
            SourceLineNode(24...32)(),
            nil,
@ -22,7 +24,9 @@ ProgramNode(8...111)(
       [InNode(62...99)(
          ArrayPatternNode(65...99)(
            nil,
-            [SourceFileNode(66...74)("/fixtures/whitequark/pattern_matching__FILE__LINE_literals.txt"),
+            [SourceFileNode(66...74)(
               "whitequark/pattern_matching__FILE__LINE_literals.txt"
             ),
             SourceLineNode(76...84)(),
             SourceEncodingNode(86...98)()],
            nil,
--- a/test/yarp/snapshots/whitequark/string_FILE.txt
+++ b/test/yarp/snapshots/whitequark/string_FILE.txt
@ -1,6 +1,6 @@
 ProgramNode(0...8)(
  [],
  StatementsNode(0...8)(
-    [SourceFileNode(0...8)("/fixtures/whitequark/string___FILE__.txt")]
+    [SourceFileNode(0...8)("whitequark/string___FILE__.txt")]
  )
 )
--- a/yarp/api_node.c
+++ b/yarp/api_node.c
--- a/yarp/ast.h
+++ b/yarp/ast.h
@ -9,14 +9,13 @@
 #define YARP_AST_H
 #include "yarp/defines.h"
 #include "yarp/util/yp_constant_pool.h"
 #include "yarp/util/yp_string.h"
 #include <assert.h>
 #include <stddef.h>
 #include <stdint.h>
 #include "yarp/util/yp_constant_pool.h"
 #include "yarp/util/yp_string.h"
 // This enum represents every type of token in the Ruby source.
 typedef enum yp_token_type {
    YP_TOKEN_EOF = 1, // final token in the file
--- a/yarp/compile.c
+++ b/yarp/compile.c
@ -1,826 +0,0 @@
 #include "yarp/extension.h"
 typedef enum {
    YP_ISEQ_TYPE_TOP,
    YP_ISEQ_TYPE_BLOCK
 } yp_iseq_type_t;
 typedef enum {
    YP_RUBY_EVENT_B_CALL,
    YP_RUBY_EVENT_B_RETURN
 } yp_ruby_event_t;
 typedef struct yp_iseq_compiler {
    // This is the parent compiler. It is used to communicate between ISEQs that
    // need to be able to jump back to the parent ISEQ.
    struct yp_iseq_compiler *parent;
    // This is the list of local variables that are defined on this scope.
    yp_constant_id_list_t *locals;
    // This is the instruction sequence that we are compiling. It's actually just
    // a Ruby array that maps to the output of RubyVM::InstructionSequence#to_a.
    VALUE insns;
    // This is a list of IDs coming from the instructions that are being compiled.
    // In theory they should be deterministic, but we don't have that
    // functionality yet. Fortunately you can pass -1 for all of them and
    // everything for the most part continues to work.
    VALUE node_ids;
    // This is the current size of the instruction sequence's stack.
    int stack_size;
    // This is the maximum size of the instruction sequence's stack.
    int stack_max;
    // This is the name of the instruction sequence.
    const char *name;
    // This is the type of the instruction sequence.
    yp_iseq_type_t type;
    // This is the optional argument information.
    VALUE optionals;
    // This is the number of arguments.
    int arg_size;
    // This is the current size of the instruction sequence's instructions and
    // operands.
    size_t size;
    // This is the index of the current inline storage.
    size_t inline_storage_index;
 } yp_iseq_compiler_t;
 static void
 yp_iseq_compiler_init(yp_iseq_compiler_t *compiler, yp_iseq_compiler_t *parent, yp_constant_id_list_t *locals, const char *name, yp_iseq_type_t type) {
    *compiler = (yp_iseq_compiler_t) {
        .parent = parent,
        .locals = locals,
        .insns = rb_ary_new(),
        .node_ids = rb_ary_new(),
        .stack_size = 0,
        .stack_max = 0,
        .name = name,
        .type = type,
        .optionals = rb_hash_new(),
        .arg_size = 0,
        .size = 0,
        .inline_storage_index = 0
    };
 }
 /******************************************************************************/
 /* Utilities                                                                  */
 /******************************************************************************/
 static inline int
 sizet2int(size_t value) {
    if (value > INT_MAX) rb_raise(rb_eRuntimeError, "value too large");
    return (int) value;
 }
 static int
 local_index(yp_iseq_compiler_t *compiler, yp_constant_id_t constant_id, int depth) {
    int compiler_index;
    yp_iseq_compiler_t *local_compiler = compiler;
    for (compiler_index = 0; compiler_index < depth; compiler_index++) {
        local_compiler = local_compiler->parent;
        assert(local_compiler != NULL);
    }
    size_t index;
    for (index = 0; index < local_compiler->locals->size; index++) {
        if (local_compiler->locals->ids[index] == constant_id) {
            return sizet2int(local_compiler->locals->size - index + 2);
        }
    }
    return -1;
 }
 /******************************************************************************/
 /* Parse specific VALUEs from strings                                         */
 /******************************************************************************/
 static VALUE
 parse_number(const char *start, const char *end) {
    size_t length = end - start;
    char *buffer = alloca(length + 1);
    memcpy(buffer, start, length);
    buffer[length] = '\0';
    return rb_cstr_to_inum(buffer, -10, Qfalse);
 }
 static inline VALUE
 parse_string(yp_string_t *string) {
    return rb_str_new(yp_string_source(string), yp_string_length(string));
 }
 static inline ID
 parse_symbol(const char *start, const char *end) {
    return rb_intern2(start, end - start);
 }
 static inline ID
 parse_location_symbol(yp_location_t *location) {
    return parse_symbol(location->start, location->end);
 }
 static inline ID
 parse_node_symbol(yp_node_t *node) {
    return parse_symbol(node->location.start, node->location.end);
 }
 static inline ID
 parse_string_symbol(yp_string_t *string) {
    const char *start = yp_string_source(string);
    return parse_symbol(start, start + yp_string_length(string));
 }
 /******************************************************************************/
 /* Create Ruby objects for compilation                                        */
 /******************************************************************************/
 static VALUE
 yp_iseq_new(yp_iseq_compiler_t *compiler) {
    VALUE code_location = rb_ary_new_capa(4);
    rb_ary_push(code_location, INT2FIX(1));
    rb_ary_push(code_location, INT2FIX(0));
    rb_ary_push(code_location, INT2FIX(1));
    rb_ary_push(code_location, INT2FIX(0));
    VALUE data = rb_hash_new();
    rb_hash_aset(data, ID2SYM(rb_intern("arg_size")), INT2FIX(compiler->arg_size));
    rb_hash_aset(data, ID2SYM(rb_intern("local_size")), INT2FIX(0));
    rb_hash_aset(data, ID2SYM(rb_intern("stack_max")), INT2FIX(compiler->stack_max));
    rb_hash_aset(data, ID2SYM(rb_intern("node_id")), INT2FIX(-1));
    rb_hash_aset(data, ID2SYM(rb_intern("code_location")), code_location);
    rb_hash_aset(data, ID2SYM(rb_intern("node_ids")), compiler->node_ids);
    VALUE type = Qnil;
    switch (compiler->type) {
        case YP_ISEQ_TYPE_TOP:
            type = ID2SYM(rb_intern("top"));
            break;
        case YP_ISEQ_TYPE_BLOCK:
            type = ID2SYM(rb_intern("block"));
            break;
    }
    VALUE iseq = rb_ary_new_capa(13);
    rb_ary_push(iseq, rb_str_new_cstr("YARVInstructionSequence/SimpleDataFormat"));
    rb_ary_push(iseq, INT2FIX(3));
    rb_ary_push(iseq, INT2FIX(3));
    rb_ary_push(iseq, INT2FIX(1));
    rb_ary_push(iseq, data);
    rb_ary_push(iseq, rb_str_new_cstr(compiler->name));
    rb_ary_push(iseq, rb_str_new_cstr("<compiled>"));
    rb_ary_push(iseq, rb_str_new_cstr("<compiled>"));
    rb_ary_push(iseq, INT2FIX(1));
    rb_ary_push(iseq, type);
    rb_ary_push(iseq, rb_ary_new());
    rb_ary_push(iseq, compiler->optionals);
    rb_ary_push(iseq, rb_ary_new());
    rb_ary_push(iseq, compiler->insns);
    return iseq;
 }
 // static const int YP_CALLDATA_ARGS_SPLAT = 1 << 0;
 // static const int YP_CALLDATA_ARGS_BLOCKARG = 1 << 1;
 static const int YP_CALLDATA_FCALL = 1 << 2;
 static const int YP_CALLDATA_VCALL = 1 << 3;
 static const int YP_CALLDATA_ARGS_SIMPLE = 1 << 4;
 // static const int YP_CALLDATA_BLOCKISEQ = 1 << 5;
 // static const int YP_CALLDATA_KWARG = 1 << 6;
 // static const int YP_CALLDATA_KW_SPLAT = 1 << 7;
 // static const int YP_CALLDATA_TAILCALL = 1 << 8;
 // static const int YP_CALLDATA_SUPER = 1 << 9;
 // static const int YP_CALLDATA_ZSUPER = 1 << 10;
 // static const int YP_CALLDATA_OPT_SEND = 1 << 11;
 // static const int YP_CALLDATA_KW_SPLAT_MUT = 1 << 12;
 static VALUE
 yp_calldata_new(ID mid, int flag, size_t orig_argc) {
    VALUE calldata = rb_hash_new();
    rb_hash_aset(calldata, ID2SYM(rb_intern("mid")), ID2SYM(mid));
    rb_hash_aset(calldata, ID2SYM(rb_intern("flag")), INT2FIX(flag));
    rb_hash_aset(calldata, ID2SYM(rb_intern("orig_argc")), INT2FIX(orig_argc));
    return calldata;
 }
 static inline VALUE
 yp_inline_storage_new(yp_iseq_compiler_t *compiler) {
    return INT2FIX(compiler->inline_storage_index++);
 }
 /******************************************************************************/
 /* Push instructions onto a compiler                                          */
 /******************************************************************************/
 static VALUE
 push_insn(yp_iseq_compiler_t *compiler, int stack_change, size_t size, ...) {
    va_list opnds;
    va_start(opnds, size);
    VALUE insn = rb_ary_new_capa(size);
    for (size_t index = 0; index < size; index++) {
        rb_ary_push(insn, va_arg(opnds, VALUE));
    }
    va_end(opnds);
    compiler->stack_size += stack_change;
    if (compiler->stack_size > compiler->stack_max) {
        compiler->stack_max = compiler->stack_size;
    }
    compiler->size += size;
    rb_ary_push(compiler->insns, insn);
    rb_ary_push(compiler->node_ids, INT2FIX(-1));
    return insn;
 }
 static VALUE
 push_label(yp_iseq_compiler_t *compiler) {
    VALUE label = ID2SYM(rb_intern_str(rb_sprintf("label_%zu", compiler->size)));
    rb_ary_push(compiler->insns, label);
    return label;
 }
 static void
 push_ruby_event(yp_iseq_compiler_t *compiler, yp_ruby_event_t event) {
    switch (event) {
        case YP_RUBY_EVENT_B_CALL:
            rb_ary_push(compiler->insns, ID2SYM(rb_intern("RUBY_EVENT_B_CALL")));
            break;
        case YP_RUBY_EVENT_B_RETURN:
            rb_ary_push(compiler->insns, ID2SYM(rb_intern("RUBY_EVENT_B_RETURN")));
            break;
    }
 }
 static inline VALUE
 push_anytostring(yp_iseq_compiler_t *compiler) {
    return push_insn(compiler, -2 + 1, 1, ID2SYM(rb_intern("anytostring")));
 }
 static inline VALUE
 push_branchif(yp_iseq_compiler_t *compiler, VALUE label) {
    return push_insn(compiler, -1 + 0, 2, ID2SYM(rb_intern("branchif")), label);
 }
 static inline VALUE
 push_branchunless(yp_iseq_compiler_t *compiler, VALUE label) {
    return push_insn(compiler, -1 + 0, 2, ID2SYM(rb_intern("branchunless")), label);
 }
 static inline VALUE
 push_concatstrings(yp_iseq_compiler_t *compiler, int count) {
    return push_insn(compiler, -count + 1, 2, ID2SYM(rb_intern("concatstrings")), INT2FIX(count));
 }
 static inline VALUE
 push_dup(yp_iseq_compiler_t *compiler) {
    return push_insn(compiler, -1 + 2, 1, ID2SYM(rb_intern("dup")));
 }
 static inline VALUE
 push_getclassvariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
    return push_insn(compiler, -0 + 1, 3, ID2SYM(rb_intern("getclassvariable")), name, inline_storage);
 }
 static inline VALUE
 push_getconstant(yp_iseq_compiler_t *compiler, VALUE name) {
    return push_insn(compiler, -2 + 1, 2, ID2SYM(rb_intern("getconstant")), name);
 }
 static inline VALUE
 push_getglobal(yp_iseq_compiler_t *compiler, VALUE name) {
    return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("getglobal")), name);
 }
 static inline VALUE
 push_getinstancevariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
    return push_insn(compiler, -0 + 1, 3, ID2SYM(rb_intern("getinstancevariable")), name, inline_storage);
 }
 static inline VALUE
 push_getlocal(yp_iseq_compiler_t *compiler, VALUE index, VALUE depth) {
    return push_insn(compiler, -0 + 1, 3, ID2SYM(rb_intern("getlocal")), index, depth);
 }
 static inline VALUE
 push_leave(yp_iseq_compiler_t *compiler) {
    return push_insn(compiler, -1 + 0, 1, ID2SYM(rb_intern("leave")));
 }
 static inline VALUE
 push_newarray(yp_iseq_compiler_t *compiler, int count) {
    return push_insn(compiler, -count + 1, 2, ID2SYM(rb_intern("newarray")), INT2FIX(count));
 }
 static inline VALUE
 push_newhash(yp_iseq_compiler_t *compiler, int count) {
    return push_insn(compiler, -count + 1, 2, ID2SYM(rb_intern("newhash")), INT2FIX(count));
 }
 static inline VALUE
 push_newrange(yp_iseq_compiler_t *compiler, VALUE flag) {
    return push_insn(compiler, -2 + 1, 2, ID2SYM(rb_intern("newrange")), flag);
 }
 static inline VALUE
 push_nop(yp_iseq_compiler_t *compiler) {
    return push_insn(compiler, -2 + 1, 1, ID2SYM(rb_intern("nop")));
 }
 static inline VALUE
 push_objtostring(yp_iseq_compiler_t *compiler, VALUE calldata) {
    return push_insn(compiler, -1 + 1, 2, ID2SYM(rb_intern("objtostring")), calldata);
 }
 static inline VALUE
 push_pop(yp_iseq_compiler_t *compiler) {
    return push_insn(compiler, -1 + 0, 1, ID2SYM(rb_intern("pop")));
 }
 static inline VALUE
 push_putnil(yp_iseq_compiler_t *compiler) {
    return push_insn(compiler, -0 + 1, 1, ID2SYM(rb_intern("putnil")));
 }
 static inline VALUE
 push_putobject(yp_iseq_compiler_t *compiler, VALUE value) {
    return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("putobject")), value);
 }
 static inline VALUE
 push_putself(yp_iseq_compiler_t *compiler) {
    return push_insn(compiler, -0 + 1, 1, ID2SYM(rb_intern("putself")));
 }
 static inline VALUE
 push_setlocal(yp_iseq_compiler_t *compiler, VALUE index, VALUE depth) {
    return push_insn(compiler, -1 + 0, 3, ID2SYM(rb_intern("setlocal")), index, depth);
 }
 static const VALUE YP_SPECIALOBJECT_VMCORE = INT2FIX(1);
 static const VALUE YP_SPECIALOBJECT_CBASE = INT2FIX(2);
 // static const VALUE YP_SPECIALOBJECT_CONST_BASE = INT2FIX(3);
 static inline VALUE
 push_putspecialobject(yp_iseq_compiler_t *compiler, VALUE object) {
    return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("putspecialobject")), object);
 }
 static inline VALUE
 push_putstring(yp_iseq_compiler_t *compiler, VALUE string) {
    return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("putstring")), string);
 }
 static inline VALUE
 push_send(yp_iseq_compiler_t *compiler, int stack_change, VALUE calldata, VALUE block_iseq) {
    return push_insn(compiler, stack_change, 3, ID2SYM(rb_intern("send")), calldata, block_iseq);
 }
 static inline VALUE
 push_setclassvariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
    return push_insn(compiler, -1 + 0, 3, ID2SYM(rb_intern("setclassvariable")), name, inline_storage);
 }
 static inline VALUE
 push_setglobal(yp_iseq_compiler_t *compiler, VALUE name) {
    return push_insn(compiler, -1 + 0, 2, ID2SYM(rb_intern("setglobal")), name);
 }
 static inline VALUE
 push_setinstancevariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
    return push_insn(compiler, -1 + 0, 3, ID2SYM(rb_intern("setinstancevariable")), name, inline_storage);
 }
 /******************************************************************************/
 /* Compile an AST node using the given compiler                               */
 /******************************************************************************/
 static void
 yp_compile_node(yp_iseq_compiler_t *compiler, yp_node_t *base_node) {
    switch (base_node->type) {
        case YP_NODE_ALIAS_NODE: {
            yp_alias_node_t *node = (yp_alias_node_t *) base_node;
            push_putspecialobject(compiler, YP_SPECIALOBJECT_VMCORE);
            push_putspecialobject(compiler, YP_SPECIALOBJECT_CBASE);
            yp_compile_node(compiler, node->new_name);
            yp_compile_node(compiler, node->old_name);
            push_send(compiler, -3, yp_calldata_new(rb_intern("core#set_method_alias"), YP_CALLDATA_ARGS_SIMPLE, 3), Qnil);
            return;
        }
        case YP_NODE_AND_NODE: {
            yp_and_node_t *node = (yp_and_node_t *) base_node;
            yp_compile_node(compiler, node->left);
            push_dup(compiler);
            VALUE branchunless = push_branchunless(compiler, Qnil);
            push_pop(compiler);
            yp_compile_node(compiler, node->right);
            VALUE label = push_label(compiler);
            rb_ary_store(branchunless, 1, label);
            return;
        }
        case YP_NODE_ARGUMENTS_NODE: {
            yp_arguments_node_t *node = (yp_arguments_node_t *) base_node;
            yp_node_list_t node_list = node->arguments;
            for (size_t index = 0; index < node_list.size; index++) {
                yp_compile_node(compiler, node_list.nodes[index]);
            }
            return;
        }
        case YP_NODE_ARRAY_NODE: {
            yp_array_node_t *node = (yp_array_node_t *) base_node;
            yp_node_list_t elements = node->elements;
            for (size_t index = 0; index < elements.size; index++) {
                yp_compile_node(compiler, elements.nodes[index]);
            }
            push_newarray(compiler, sizet2int(elements.size));
            return;
        }
        case YP_NODE_ASSOC_NODE: {
            yp_assoc_node_t *node = (yp_assoc_node_t *) base_node;
            yp_compile_node(compiler, node->key);
            yp_compile_node(compiler, node->value);
            return;
        }
        case YP_NODE_BLOCK_NODE: {
            yp_block_node_t *node = (yp_block_node_t *) base_node;
            VALUE optional_labels = rb_ary_new();
            if (node->parameters &&
                    node->parameters->parameters &&
                    node->parameters->parameters->optionals.size > 0) {
                compiler->arg_size += node->parameters->parameters->optionals.size;
                yp_node_list_t *optionals = &node->parameters->parameters->optionals;
                for (size_t i = 0; i < optionals->size; i++) {
                    VALUE label = push_label(compiler);
                    rb_ary_push(optional_labels, label);
                    yp_compile_node(compiler, optionals->nodes[i]);
                }
                VALUE label = push_label(compiler);
                rb_ary_push(optional_labels, label);
                rb_hash_aset(compiler->optionals, ID2SYM(rb_intern("opt")), optional_labels);
                push_ruby_event(compiler, YP_RUBY_EVENT_B_CALL);
                push_nop(compiler);
            } else {
                push_ruby_event(compiler, YP_RUBY_EVENT_B_CALL);
            }
            if (node->statements) {
                yp_compile_node(compiler, node->statements);
            } else {
                push_putnil(compiler);
            }
            push_ruby_event(compiler, YP_RUBY_EVENT_B_RETURN);
            push_leave(compiler);
            return;
        }
        case YP_NODE_CALL_NODE: {
            yp_call_node_t *node = (yp_call_node_t *) base_node;
            ID mid = parse_location_symbol(&node->message_loc);
            int flags = 0;
            size_t orig_argc;
            if (node->receiver == NULL) {
                push_putself(compiler);
            } else {
                yp_compile_node(compiler, node->receiver);
            }
            if (node->arguments == NULL) {
                if (flags & YP_CALLDATA_FCALL) flags |= YP_CALLDATA_VCALL;
                orig_argc = 0;
            } else {
                yp_arguments_node_t *arguments = node->arguments;
                yp_compile_node(compiler, (yp_node_t *) arguments);
                orig_argc = arguments->arguments.size;
            }
            VALUE block_iseq = Qnil;
            if (node->block != NULL) {
                yp_iseq_compiler_t block_compiler;
                yp_iseq_compiler_init(
                    &block_compiler,
                    compiler,
                    &node->block->locals,
                    "block in <compiled>",
                    YP_ISEQ_TYPE_BLOCK
                );
                yp_compile_node(&block_compiler, (yp_node_t *) node->block);
                block_iseq = yp_iseq_new(&block_compiler);
            }
            if (block_iseq == Qnil && flags == 0) {
                flags |= YP_CALLDATA_ARGS_SIMPLE;
            }
            if (node->receiver == NULL) {
                flags |= YP_CALLDATA_FCALL;
                if (block_iseq == Qnil && node->arguments == NULL) {
                    flags |= YP_CALLDATA_VCALL;
                }
            }
            push_send(compiler, -sizet2int(orig_argc), yp_calldata_new(mid, flags, orig_argc), block_iseq);
            return;
        }
        case YP_NODE_CLASS_VARIABLE_READ_NODE: {
            yp_class_variable_read_node_t *node = (yp_class_variable_read_node_t *) base_node;
            push_getclassvariable(compiler, ID2SYM(parse_node_symbol((yp_node_t *) node)), yp_inline_storage_new(compiler));
            return;
        }
        case YP_NODE_CLASS_VARIABLE_WRITE_NODE: {
            yp_class_variable_write_node_t *node = (yp_class_variable_write_node_t *) base_node;
            if (node->value == NULL) {
                rb_raise(rb_eNotImpError, "class variable write without value not implemented");
            }
            yp_compile_node(compiler, node->value);
            push_dup(compiler);
            push_setclassvariable(compiler, ID2SYM(parse_location_symbol(&node->name_loc)), yp_inline_storage_new(compiler));
            return;
        }
        case YP_NODE_CONSTANT_PATH_NODE: {
            yp_constant_path_node_t *node = (yp_constant_path_node_t *) base_node;
            yp_compile_node(compiler, node->parent);
            push_putobject(compiler, Qfalse);
            push_getconstant(compiler, ID2SYM(parse_node_symbol((yp_node_t *) node->child)));
            return;
        }
        case YP_NODE_CONSTANT_READ_NODE:
            push_putnil(compiler);
            push_putobject(compiler, Qtrue);
            push_getconstant(compiler, ID2SYM(parse_node_symbol((yp_node_t *) base_node)));
            return;
        case YP_NODE_EMBEDDED_STATEMENTS_NODE: {
            yp_embedded_statements_node_t *node = (yp_embedded_statements_node_t *) base_node;
            yp_compile_node(compiler, (yp_node_t *) node->statements);
            return;
        }
        case YP_NODE_FALSE_NODE:
            push_putobject(compiler, Qfalse);
            return;
        case YP_NODE_GLOBAL_VARIABLE_READ_NODE:
            push_getglobal(compiler, ID2SYM(parse_location_symbol(&base_node->location)));
            return;
        case YP_NODE_GLOBAL_VARIABLE_WRITE_NODE: {
            yp_global_variable_write_node_t *node = (yp_global_variable_write_node_t *) base_node;
            if (node->value == NULL) {
                rb_raise(rb_eNotImpError, "global variable write without value not implemented");
            }
            yp_compile_node(compiler, node->value);
            push_dup(compiler);
            push_setglobal(compiler, ID2SYM(parse_location_symbol(&node->name_loc)));
            return;
        }
        case YP_NODE_HASH_NODE: {
            yp_hash_node_t *node = (yp_hash_node_t *) base_node;
            yp_node_list_t elements = node->elements;
            for (size_t index = 0; index < elements.size; index++) {
                yp_compile_node(compiler, elements.nodes[index]);
            }
            push_newhash(compiler, sizet2int(elements.size * 2));
            return;
        }
        case YP_NODE_INSTANCE_VARIABLE_READ_NODE:
            push_getinstancevariable(compiler, ID2SYM(parse_node_symbol((yp_node_t *) base_node)), yp_inline_storage_new(compiler));
            return;
        case YP_NODE_INSTANCE_VARIABLE_WRITE_NODE: {
            yp_instance_variable_write_node_t *node = (yp_instance_variable_write_node_t *) base_node;
            if (node->value == NULL) {
                rb_raise(rb_eNotImpError, "instance variable write without value not implemented");
            }
            yp_compile_node(compiler, node->value);
            push_dup(compiler);
            push_setinstancevariable(compiler, ID2SYM(parse_location_symbol(&node->name_loc)), yp_inline_storage_new(compiler));
            return;
        }
        case YP_NODE_INTEGER_NODE:
            push_putobject(compiler, parse_number(base_node->location.start, base_node->location.end));
            return;
        case YP_NODE_INTERPOLATED_STRING_NODE: {
            yp_interpolated_string_node_t *node = (yp_interpolated_string_node_t *) base_node;
            for (size_t index = 0; index < node->parts.size; index++) {
                yp_node_t *part = node->parts.nodes[index];
                switch (part->type) {
                    case YP_NODE_STRING_NODE: {
                        yp_string_node_t *string_node = (yp_string_node_t *) part;
                        push_putobject(compiler, parse_string(&string_node->unescaped));
                        break;
                    }
                    default:
                        yp_compile_node(compiler, part);
                        push_dup(compiler);
                        push_objtostring(compiler, yp_calldata_new(rb_intern("to_s"), YP_CALLDATA_FCALL | YP_CALLDATA_ARGS_SIMPLE, 0));
                        push_anytostring(compiler);
                        break;
                }
            }
            push_concatstrings(compiler, sizet2int(node->parts.size));
            return;
        }
        case YP_NODE_KEYWORD_HASH_NODE: {
            yp_keyword_hash_node_t *node = (yp_keyword_hash_node_t *) base_node;
            yp_node_list_t elements = node->elements;
            for (size_t index = 0; index < elements.size; index++) {
                yp_compile_node(compiler, elements.nodes[index]);
            }
            push_newhash(compiler, sizet2int(elements.size * 2));
            return;
        }
        case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
            yp_local_variable_read_node_t *node = (yp_local_variable_read_node_t *) base_node;
            int index = local_index(compiler, node->constant_id, node->depth);
            push_getlocal(compiler, INT2FIX(index), INT2FIX(node->depth));
            return;
        }
        case YP_NODE_LOCAL_VARIABLE_WRITE_NODE: {
            yp_local_variable_write_node_t *node = (yp_local_variable_write_node_t *) base_node;
            if (node->value == NULL) {
                rb_raise(rb_eNotImpError, "local variable write without value not implemented");
            }
            int index = local_index(compiler, node->constant_id, node->depth);
            yp_compile_node(compiler, node->value);
            push_dup(compiler);
            push_setlocal(compiler, INT2FIX(index), INT2FIX(node->depth));
            return;
        }
        case YP_NODE_NIL_NODE:
            push_putnil(compiler);
            return;
        case YP_NODE_OR_NODE: {
            yp_or_node_t *node = (yp_or_node_t *) base_node;
            yp_compile_node(compiler, node->left);
            push_dup(compiler);
            VALUE branchif = push_branchif(compiler, Qnil);
            push_pop(compiler);
            yp_compile_node(compiler, node->right);
            VALUE label = push_label(compiler);
            rb_ary_store(branchif, 1, label);
            return;
        }
        case YP_NODE_PARENTHESES_NODE: {
            yp_parentheses_node_t *node = (yp_parentheses_node_t *) base_node;
            if (node->statements == NULL) {
                push_putnil(compiler);
            } else {
                yp_compile_node(compiler, node->statements);
            }
            return;
        }
        case YP_NODE_PROGRAM_NODE: {
            yp_program_node_t *node = (yp_program_node_t *) base_node;
            if (node->statements->body.size == 0) {
                push_putnil(compiler);
            } else {
                yp_compile_node(compiler, (yp_node_t *) node->statements);
            }
            push_leave(compiler);
            return;
        }
        case YP_NODE_RANGE_NODE: {
            yp_range_node_t *node = (yp_range_node_t *) base_node;
            if (node->left == NULL) {
                push_putnil(compiler);
            } else {
                yp_compile_node(compiler, node->left);
            }
            if (node->right == NULL) {
                push_putnil(compiler);
            } else {
                yp_compile_node(compiler, node->right);
            }
            push_newrange(compiler, INT2FIX((node->operator_loc.end - node->operator_loc.start) == 3));
            return;
        }
        case YP_NODE_SELF_NODE:
            push_putself(compiler);
            return;
        case YP_NODE_STATEMENTS_NODE: {
            yp_statements_node_t *node = (yp_statements_node_t *) base_node;
            yp_node_list_t node_list = node->body;
            for (size_t index = 0; index < node_list.size; index++) {
                yp_compile_node(compiler, node_list.nodes[index]);
                if (index < node_list.size - 1) push_pop(compiler);
            }
            return;
        }
        case YP_NODE_STRING_NODE: {
            yp_string_node_t *node = (yp_string_node_t *) base_node;
            push_putstring(compiler, parse_string(&node->unescaped));
            return;
        }
        case YP_NODE_SYMBOL_NODE: {
            yp_symbol_node_t *node = (yp_symbol_node_t *) base_node;
            push_putobject(compiler, ID2SYM(parse_string_symbol(&node->unescaped)));
            return;
        }
        case YP_NODE_TRUE_NODE:
            push_putobject(compiler, Qtrue);
            return;
        case YP_NODE_UNDEF_NODE: {
            yp_undef_node_t *node = (yp_undef_node_t *) base_node;
            for (size_t index = 0; index < node->names.size; index++) {
                push_putspecialobject(compiler, YP_SPECIALOBJECT_VMCORE);
                push_putspecialobject(compiler, YP_SPECIALOBJECT_CBASE);
                yp_compile_node(compiler, node->names.nodes[index]);
                push_send(compiler, -2, yp_calldata_new(rb_intern("core#undef_method"), YP_CALLDATA_ARGS_SIMPLE, 2), Qnil);
                if (index < node->names.size - 1) push_pop(compiler);
            }
            return;
        }
        case YP_NODE_X_STRING_NODE: {
            yp_x_string_node_t *node = (yp_x_string_node_t *) base_node;
            push_putself(compiler);
            push_putobject(compiler, parse_string(&node->unescaped));
            push_send(compiler, -1, yp_calldata_new(rb_intern("`"), YP_CALLDATA_FCALL | YP_CALLDATA_ARGS_SIMPLE, 1), Qnil);
            return;
        }
        case YP_NODE_OPTIONAL_PARAMETER_NODE: {
            yp_optional_parameter_node_t *node = (yp_optional_parameter_node_t *) base_node;
            int depth = 0;
            int index = local_index(compiler, node->constant_id, depth);
            yp_compile_node(compiler, node->value);
            push_setlocal(compiler, INT2FIX(index), INT2FIX(depth));
            break;
        }
        default:
            rb_raise(rb_eNotImpError, "node type %d not implemented", base_node->type);
            return;
    }
 }
 // This function compiles the given node into a list of instructions.
 VALUE
 yp_compile(yp_node_t *node) {
    assert(node->type == YP_NODE_PROGRAM_NODE);
    yp_iseq_compiler_t compiler;
    yp_iseq_compiler_init(
        &compiler,
        NULL,
        &((yp_program_node_t *) node)->locals,
        "<compiled>",
        YP_ISEQ_TYPE_TOP
    );
    yp_compile_node(&compiler, node);
    return yp_iseq_new(&compiler);
 }
--- a/yarp/config.h
+++ b/yarp/config.h
@ -0,0 +1 @@
 #include "ruby/config.h"
--- a/yarp/defines.h
+++ b/yarp/defines.h
@ -1,8 +1,20 @@
 #ifndef YARP_DEFINES_H
 #define YARP_DEFINES_H
 // This file should be included first by any *.h or *.c in YARP
 #include "yarp/config.h"
 #include <ctype.h>
 #include <stdarg.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <string.h>
 // YP_EXPORTED_FUNCTION
-#if defined(_WIN32)
+#if defined(YP_STATIC)
 #   define YP_EXPORTED_FUNCTION
 #elif defined(_WIN32)
 #   define YP_EXPORTED_FUNCTION __declspec(dllexport) extern
 #else
 #   ifndef YP_EXPORTED_FUNCTION
@ -16,9 +28,9 @@
 // YP_ATTRIBUTE_UNUSED
 #if defined(__GNUC__)
-# define YP_ATTRIBUTE_UNUSED __attribute__((unused))
+#   define YP_ATTRIBUTE_UNUSED __attribute__((unused))
 #else
-# define YP_ATTRIBUTE_UNUSED
+#   define YP_ATTRIBUTE_UNUSED
 #endif
 // inline
@ -26,4 +38,13 @@
 #   define inline __inline
 #endif
 int yp_strncasecmp(const char *string1, const char *string2, size_t length);
 int yp_snprintf(char *dest, YP_ATTRIBUTE_UNUSED size_t size, const char *format, ...);
 #if defined(HAVE_SNPRINTF)
    // We use snprintf if it's available
 #   define yp_snprintf snprintf
 #endif
 #endif
--- a/yarp/diagnostic.h
+++ b/yarp/diagnostic.h
@ -2,12 +2,11 @@
 #define YARP_DIAGNOSTIC_H
 #include "yarp/defines.h"
 #include "yarp/util/yp_list.h"
 #include <stdbool.h>
 #include <stdlib.h>
 #include "yarp/util/yp_list.h"
 // This struct represents a diagnostic found during parsing.
 typedef struct {
    yp_list_node_t node;
--- a/yarp/enc/yp_ascii.c
+++ b/yarp/enc/yp_ascii.c
@ -51,7 +51,8 @@ yp_encoding_t yp_encoding_ascii = {
    .char_width = yp_encoding_ascii_char_width,
    .alnum_char = yp_encoding_ascii_alnum_char,
    .alpha_char = yp_encoding_ascii_alpha_char,
-    .isupper_char = yp_encoding_ascii_isupper_char
+    .isupper_char = yp_encoding_ascii_isupper_char,
    .multibyte = false
 };
 yp_encoding_t yp_encoding_ascii_8bit = {
@ -60,4 +61,5 @@ yp_encoding_t yp_encoding_ascii_8bit = {
    .alnum_char = yp_encoding_ascii_alnum_char,
    .alpha_char = yp_encoding_ascii_alpha_char,
    .isupper_char = yp_encoding_ascii_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_big5.c
+++ b/yarp/enc/yp_big5.c
@ -74,5 +74,6 @@ yp_encoding_t yp_encoding_big5 = {
    .char_width = yp_encoding_big5_char_width,
    .alnum_char = yp_encoding_big5_alnum_char,
    .alpha_char = yp_encoding_big5_alpha_char,
-    .isupper_char = yp_encoding_big5_isupper_char
+    .isupper_char = yp_encoding_big5_isupper_char,
    .multibyte = true
 };
--- a/yarp/enc/yp_encoding.h
+++ b/yarp/enc/yp_encoding.h
@ -12,11 +12,28 @@
 // Each callback should return the number of bytes, or 0 if the next bytes are
 // invalid for the encoding and type.
 typedef struct {
-    const char *name;
+    // Return the number of bytes that the next character takes if it is valid
    // in the encoding.
    size_t (*char_width)(const char *c);
    // Return the number of bytes that the next character takes if it is valid
    // in the encoding and is alphabetical.
    size_t (*alpha_char)(const char *c);
    // Return the number of bytes that the next character takes if it is valid
    // in the encoding and is alphanumeric.
    size_t (*alnum_char)(const char *c);
    // Return true if the next character is valid in the encoding and is an
    // uppercase character.
    bool (*isupper_char)(const char *c);
    // The name of the encoding. This should correspond to a value that can be
    // passed to Encoding.find in Ruby.
    const char *name;
    // Return true if the encoding is a multibyte encoding.
    bool multibyte;
 } yp_encoding_t;
 // These bits define the location of each bit of metadata within the various
--- a/yarp/enc/yp_euc_jp.c
+++ b/yarp/enc/yp_euc_jp.c
@ -77,5 +77,6 @@ yp_encoding_t yp_encoding_euc_jp = {
    .char_width = yp_encoding_euc_jp_char_width,
    .alnum_char = yp_encoding_euc_jp_alnum_char,
    .alpha_char = yp_encoding_euc_jp_alpha_char,
-    .isupper_char = yp_encoding_euc_jp_isupper_char
+    .isupper_char = yp_encoding_euc_jp_isupper_char,
    .multibyte = true
 };
--- a/yarp/enc/yp_gbk.c
+++ b/yarp/enc/yp_gbk.c
@ -80,5 +80,6 @@ yp_encoding_t yp_encoding_gbk = {
    .char_width = yp_encoding_gbk_char_width,
    .alnum_char = yp_encoding_gbk_alnum_char,
    .alpha_char = yp_encoding_gbk_alpha_char,
-    .isupper_char = yp_encoding_gbk_isupper_char
+    .isupper_char = yp_encoding_gbk_isupper_char,
    .multibyte = true
 };
--- a/yarp/enc/yp_iso_8859_1.c
+++ b/yarp/enc/yp_iso_8859_1.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_1 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_1_alnum_char,
    .alpha_char = yp_encoding_iso_8859_1_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_1_isupper_char
+    .isupper_char = yp_encoding_iso_8859_1_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_10.c
+++ b/yarp/enc/yp_iso_8859_10.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_10 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_10_alnum_char,
    .alpha_char = yp_encoding_iso_8859_10_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_10_isupper_char
+    .isupper_char = yp_encoding_iso_8859_10_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_11.c
+++ b/yarp/enc/yp_iso_8859_11.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_11 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_11_alnum_char,
    .alpha_char = yp_encoding_iso_8859_11_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_11_isupper_char
+    .isupper_char = yp_encoding_iso_8859_11_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_13.c
+++ b/yarp/enc/yp_iso_8859_13.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_13 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_13_alnum_char,
    .alpha_char = yp_encoding_iso_8859_13_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_13_isupper_char
+    .isupper_char = yp_encoding_iso_8859_13_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_14.c
+++ b/yarp/enc/yp_iso_8859_14.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_14 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_14_alnum_char,
    .alpha_char = yp_encoding_iso_8859_14_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_14_isupper_char
+    .isupper_char = yp_encoding_iso_8859_14_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_15.c
+++ b/yarp/enc/yp_iso_8859_15.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_15 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_15_alnum_char,
    .alpha_char = yp_encoding_iso_8859_15_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_15_isupper_char
+    .isupper_char = yp_encoding_iso_8859_15_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_16.c
+++ b/yarp/enc/yp_iso_8859_16.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_16 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_16_alnum_char,
    .alpha_char = yp_encoding_iso_8859_16_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_16_isupper_char
+    .isupper_char = yp_encoding_iso_8859_16_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_2.c
+++ b/yarp/enc/yp_iso_8859_2.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_2 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_2_alnum_char,
    .alpha_char = yp_encoding_iso_8859_2_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_2_isupper_char
+    .isupper_char = yp_encoding_iso_8859_2_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_3.c
+++ b/yarp/enc/yp_iso_8859_3.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_3 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_3_alnum_char,
    .alpha_char = yp_encoding_iso_8859_3_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_3_isupper_char
+    .isupper_char = yp_encoding_iso_8859_3_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_4.c
+++ b/yarp/enc/yp_iso_8859_4.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_4 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_4_alnum_char,
    .alpha_char = yp_encoding_iso_8859_4_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_4_isupper_char
+    .isupper_char = yp_encoding_iso_8859_4_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_5.c
+++ b/yarp/enc/yp_iso_8859_5.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_5 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_5_alnum_char,
    .alpha_char = yp_encoding_iso_8859_5_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_5_isupper_char
+    .isupper_char = yp_encoding_iso_8859_5_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_6.c
+++ b/yarp/enc/yp_iso_8859_6.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_6 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_6_alnum_char,
    .alpha_char = yp_encoding_iso_8859_6_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_6_isupper_char
+    .isupper_char = yp_encoding_iso_8859_6_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_7.c
+++ b/yarp/enc/yp_iso_8859_7.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_7 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_7_alnum_char,
    .alpha_char = yp_encoding_iso_8859_7_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_7_isupper_char
+    .isupper_char = yp_encoding_iso_8859_7_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_8.c
+++ b/yarp/enc/yp_iso_8859_8.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_8 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_8_alnum_char,
    .alpha_char = yp_encoding_iso_8859_8_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_8_isupper_char
+    .isupper_char = yp_encoding_iso_8859_8_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_9.c
+++ b/yarp/enc/yp_iso_8859_9.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_9 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_9_alnum_char,
    .alpha_char = yp_encoding_iso_8859_9_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_9_isupper_char
+    .isupper_char = yp_encoding_iso_8859_9_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_koi8_r.c
+++ b/yarp/enc/yp_koi8_r.c
@ -51,5 +51,6 @@ yp_encoding_t yp_encoding_koi8_r = {
    .char_width = yp_encoding_koi8_r_char_width,
    .alnum_char = yp_encoding_koi8_r_alnum_char,
    .alpha_char = yp_encoding_koi8_r_alpha_char,
-    .isupper_char = yp_encoding_koi8_r_isupper_char
+    .isupper_char = yp_encoding_koi8_r_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_shift_jis.c
+++ b/yarp/enc/yp_shift_jis.c
@ -77,5 +77,6 @@ yp_encoding_t yp_encoding_shift_jis = {
    .char_width = yp_encoding_shift_jis_char_width,
    .alnum_char = yp_encoding_shift_jis_alnum_char,
    .alpha_char = yp_encoding_shift_jis_alpha_char,
-    .isupper_char = yp_encoding_shift_jis_isupper_char
+    .isupper_char = yp_encoding_shift_jis_isupper_char,
    .multibyte = true
 };
--- a/yarp/enc/yp_unicode.c
+++ b/yarp/enc/yp_unicode.c
@ -2230,7 +2230,7 @@ utf_8_codepoint(const unsigned char *c, size_t *width) {
        codepoint = (state != 0) ?
            (byte & 0x3fu) | (codepoint << 6) :
-            (0xff >> type) & (byte);
+            (0xffu >> type) & (byte);
        state = utf_8_dfa[256 + (state * 16) + type];
        if (!state) {
@ -2312,5 +2312,6 @@ yp_encoding_t yp_encoding_utf_8 = {
    .char_width = yp_encoding_utf_8_char_width,
    .alnum_char = yp_encoding_utf_8_alnum_char,
    .alpha_char = yp_encoding_utf_8_alpha_char,
-    .isupper_char = yp_encoding_utf_8_isupper_char
+    .isupper_char = yp_encoding_utf_8_isupper_char,
    .multibyte = true
 };
--- a/yarp/enc/yp_windows_1251.c
+++ b/yarp/enc/yp_windows_1251.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_windows_1251 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_windows_1251_alnum_char,
    .alpha_char = yp_encoding_windows_1251_alpha_char,
-    .isupper_char = yp_encoding_windows_1251_isupper_char
+    .isupper_char = yp_encoding_windows_1251_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_windows_1252.c
+++ b/yarp/enc/yp_windows_1252.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_windows_1252 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_windows_1252_alnum_char,
    .alpha_char = yp_encoding_windows_1252_alpha_char,
-    .isupper_char = yp_encoding_windows_1252_isupper_char
+    .isupper_char = yp_encoding_windows_1252_isupper_char,
    .multibyte = false
 };
--- a/yarp/enc/yp_windows_31j.c
+++ b/yarp/enc/yp_windows_31j.c
@ -77,5 +77,6 @@ yp_encoding_t yp_encoding_windows_31j = {
    .char_width = yp_encoding_windows_31j_char_width,
    .alnum_char = yp_encoding_windows_31j_alnum_char,
    .alpha_char = yp_encoding_windows_31j_alpha_char,
-    .isupper_char = yp_encoding_windows_31j_isupper_char
+    .isupper_char = yp_encoding_windows_31j_isupper_char,
    .multibyte = true
 };
--- a/yarp/extension.c
+++ b/yarp/extension.c
@ -1,6 +1,7 @@
 #include "yarp/extension.h"
 VALUE rb_cYARP;
 VALUE rb_cYARPSource;
 VALUE rb_cYARPToken;
 VALUE rb_cYARPLocation;
@ -9,51 +10,97 @@ VALUE rb_cYARPParseError;
 VALUE rb_cYARPParseWarning;
 VALUE rb_cYARPParseResult;
-// Represents a source of Ruby code. It can either be coming from a file or a
+/******************************************************************************/
-// string. If it's a file, it's going to mmap the contents of the file. If it's
+/* IO of Ruby code                                                            */
-// a string it's going to just point to the contents of the string.
+/******************************************************************************/
 // Represents an input of Ruby code. It can either be coming from a file or a
 // string. If it's a file, we'll use demand paging to read the contents of the
 // file into a string. If it's already a string, we'll reference it directly.
 typedef struct {
    enum { SOURCE_FILE, SOURCE_STRING } type;
    const char *source;
    size_t size;
-} source_t;
+} input_t;
 // Check if the given filepath is a string. If it's nil, then return NULL. If
 // it's not a string, then raise a type error. Otherwise return the filepath as
 // a C string.
 static const char *
 check_filepath(VALUE filepath) {
    // If the filepath is nil, then we don't need to do anything.
    if (NIL_P(filepath)) {
        return NULL;
    }
    // Check if the filepath is a string. If it's not, then raise a type error.
    if (!RB_TYPE_P(filepath, T_STRING)) {
        rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
    }
    // Otherwise, return the filepath as a C string.
    return StringValueCStr(filepath);
 }
 // Read the file indicated by the filepath parameter into source and load its
-// contents and size into the given source_t.
+// contents and size into the given input_t.
 //
 // We want to use demand paging as much as possible in order to avoid having to
 // read the entire file into memory (which could be detrimental to performance
 // for large files). This means that if we're on windows we'll use
 // `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
 // `mmap`, and on other POSIX systems we'll use `read`.
 static int
-source_file_load(source_t *source, VALUE filepath) {
+input_load_filepath(input_t *input, const char *filepath) {
 #ifdef _WIN32
-    HANDLE file = CreateFile(
+    // Open the file for reading.
-        StringValueCStr(filepath),
+    HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
-        GENERIC_READ,
+
        0,
        NULL,
        OPEN_EXISTING,
        FILE_ATTRIBUTE_NORMAL,
        NULL
    );
    if (file == INVALID_HANDLE_VALUE) {
-        perror("Invalid handle for file");
+        perror("CreateFile failed");
        return 1;
    }
    // Get the file size.
    DWORD file_size = GetFileSize(file, NULL);
-    source->source = malloc(file_size);
+    if (file_size == INVALID_FILE_SIZE) {
        CloseHandle(file);
        perror("GetFileSize failed");
        return 1;
    }
-    DWORD bytes_read;
+    // If the file is empty, then we don't need to do anything else, we'll set
-    BOOL success = ReadFile(file, DISCARD_CONST_QUAL(void *, source->source), file_size, &bytes_read, NULL);
+    // the source to a constant empty string and return.
    if (!file_size) {
        CloseHandle(file);
        input->size = 0;
        input->source = "";
        return 0;
    }
    // Create a mapping of the file.
    HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
    if (mapping == NULL) {
        CloseHandle(file);
        perror("CreateFileMapping failed");
        return 1;
    }
    // Map the file into memory.
    input->source = (const char *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
    CloseHandle(mapping);
    CloseHandle(file);
-    if (!success) {
+    if (input->source == NULL) {
-        perror("ReadFile failed");
+        perror("MapViewOfFile failed");
        return 1;
    }
-    source->size = (size_t) file_size;
+    // Set the size of the source.
    input->size = (size_t) file_size;
    return 0;
 #else
    // Open the file for reading
-    int fd = open(StringValueCStr(filepath), O_RDONLY);
+    int fd = open(filepath, O_RDONLY);
    if (fd == -1) {
        perror("open");
        return 1;
@ -68,30 +115,30 @@ source_file_load(source_t *source, VALUE filepath) {
    }
    // mmap the file descriptor to virtually get the contents
-    source->size = sb.st_size;
+    input->size = sb.st_size;
 #ifdef HAVE_MMAP
-    if (!source->size) {
+    if (!input->size) {
        close(fd);
-        source->source = "";
+        input->source = "";
        return 0;
    }
-    char * res = mmap(NULL, source->size, PROT_READ, MAP_PRIVATE, fd, 0);
+    const char *result = mmap(NULL, input->size, PROT_READ, MAP_PRIVATE, fd, 0);
-    if (res == MAP_FAILED) {
+    if (result == MAP_FAILED) {
        perror("Map failed");
        return 1;
    } else {
-        source->source = res;
+        input->source = result;
    }
 #else
-    source->source = malloc(source->size);
+    input->source = malloc(input->size);
-    if (source->source == NULL) return 1;
+    if (input->source == NULL) return 1;
-    ssize_t read_size = read(fd, (void *)source->source, source->size);
+    ssize_t read_size = read(fd, (void *) input->source, input->size);
-    if (read_size < 0 || (size_t)read_size != source->size) {
+    if (read_size < 0 || (size_t)read_size != input->size) {
        perror("Read size is incorrect");
-        free((void *)source->source);
+        free((void *) input->source);
        return 1;
    }
 #endif
@ -101,86 +148,106 @@ source_file_load(source_t *source, VALUE filepath) {
 #endif
 }
-// Load the contents and size of the given string into the given source_t.
+// Load the contents and size of the given string into the given input_t.
 static void
-source_string_load(source_t *source, VALUE string) {
+input_load_string(input_t *input, VALUE string) {
-    *source = (source_t) {
+    // Check if the string is a string. If it's not, then raise a type error.
-        .type = SOURCE_STRING,
+    if (!RB_TYPE_P(string, T_STRING)) {
-        .source = RSTRING_PTR(string),
+        rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string));
-        .size = RSTRING_LEN(string),
+    }
-    };
+
    input->source = RSTRING_PTR(string);
    input->size = RSTRING_LEN(string);
 }
-// Free any resources associated with the given source_t.
+// Free any resources associated with the given input_t. This is the corollary
 // function to source_file_load. It will unmap the file if it was mapped, or
 // free the memory if it was allocated.
 static void
-source_file_unload(source_t *source) {
+input_unload_filepath(input_t *input) {
-#ifdef _WIN32
+    // We don't need to free anything with 0 sized files because we handle that
-    free((void *)source->source);
+    // with a constant string instead.
    if (!input->size) return;
    void *memory = (void *) input->source;
 #if defined(_WIN32)
    UnmapViewOfFile(memory);
 #elif defined(HAVE_MMAP)
    munmap(memory, input->size);
 #else
-#ifdef HAVE_MMAP
+    free(memory);
    munmap((void *)source->source, source->size);
 #else
    free((void *)source->source);
 #endif
 #endif
 }
-// Dump the AST corresponding to the given source to a string.
+/******************************************************************************/
 /* Serializing the AST                                                        */
 /******************************************************************************/
 // Dump the AST corresponding to the given input to a string.
 static VALUE
-dump_source(source_t *source, const char *filepath) {
+dump_input(input_t *input, const char *filepath) {
    yp_buffer_t buffer;
    if (!yp_buffer_init(&buffer)) {
        rb_raise(rb_eNoMemError, "failed to allocate memory");
    }
    yp_parser_t parser;
-    yp_parser_init(&parser, source->source, source->size, filepath);
+    yp_parser_init(&parser, input->source, input->size, filepath);
    yp_node_t *node = yp_parse(&parser);
    yp_buffer_t buffer;
    if (!yp_buffer_init(&buffer)) rb_raise(rb_eNoMemError, "failed to allocate memory");
    yp_serialize(&parser, node, &buffer);
    VALUE dumped = rb_str_new(buffer.value, buffer.length);
    VALUE result = rb_str_new(buffer.value, buffer.length);
    yp_node_destroy(&parser, node);
    yp_buffer_free(&buffer);
    yp_parser_free(&parser);
-    return dumped;
+    return result;
 }
 // Dump the AST corresponding to the given string to a string.
 static VALUE
-dump(VALUE self, VALUE string, VALUE filepath) {
+dump(int argc, VALUE *argv, VALUE self) {
-    source_t source;
+    VALUE string;
-    source_string_load(&source, string);
+    VALUE filepath;
-    char *str = NULL;
+    rb_scan_args(argc, argv, "11", &string, &filepath);
-    if (filepath != Qnil) {
+    input_t input;
-        str = StringValueCStr(filepath);
+    input_load_string(&input, string);
-    }
+    return dump_input(&input, check_filepath(filepath));
    return dump_source(&source, str);
 }
 // Dump the AST corresponding to the given file to a string.
 static VALUE
 dump_file(VALUE self, VALUE filepath) {
-    source_t source;
+    input_t input;
-    if (source_file_load(&source, filepath) != 0) return Qnil;
+
    const char *checked = check_filepath(filepath);
    if (input_load_filepath(&input, checked) != 0) return Qnil;
    VALUE value = dump_input(&input, checked);
    input_unload_filepath(&input);
    VALUE value = dump_source(&source, StringValueCStr(filepath));
    source_file_unload(&source);
    return value;
 }
 /******************************************************************************/
 /* Extracting values for the parse result                                     */
 /******************************************************************************/
 // Extract the comments out of the parser into an array.
 static VALUE
-parser_comments(yp_parser_t *parser) {
+parser_comments(yp_parser_t *parser, VALUE source) {
    VALUE comments = rb_ary_new();
    yp_comment_t *comment;
-    for (comment = (yp_comment_t *) parser->comment_list.head; comment != NULL; comment = (yp_comment_t *) comment->node.next) {
+    for (yp_comment_t *comment = (yp_comment_t *) parser->comment_list.head; comment != NULL; comment = (yp_comment_t *) comment->node.next) {
-        VALUE location_argv[] = { LONG2FIX(comment->start - parser->start), LONG2FIX(comment->end - parser->start) };
+        VALUE location_argv[] = {
            source,
            LONG2FIX(comment->start - parser->start),
            LONG2FIX(comment->end - parser->start)
        };
        VALUE type;
        switch (comment->type) {
            case YP_COMMENT_INLINE:
                type = ID2SYM(rb_intern("inline"));
@ -196,7 +263,7 @@ parser_comments(yp_parser_t *parser) {
                break;
        }
-        VALUE comment_argv[] = { type, rb_class_new_instance(2, location_argv, rb_cYARPLocation) };
+        VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv, rb_cYARPLocation) };
        rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cYARPComment));
    }
@ -205,19 +272,20 @@ parser_comments(yp_parser_t *parser) {
 // Extract the errors out of the parser into an array.
 static VALUE
-parser_errors(yp_parser_t *parser, rb_encoding *encoding) {
+parser_errors(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
    VALUE errors = rb_ary_new();
    yp_diagnostic_t *error;
    for (error = (yp_diagnostic_t *) parser->error_list.head; error != NULL; error = (yp_diagnostic_t *) error->node.next) {
        VALUE location_argv[] = {
            source,
            LONG2FIX(error->start - parser->start),
            LONG2FIX(error->end - parser->start)
        };
        VALUE error_argv[] = {
            rb_enc_str_new_cstr(error->message, encoding),
-            rb_class_new_instance(2, location_argv, rb_cYARPLocation)
+            rb_class_new_instance(3, location_argv, rb_cYARPLocation)
        };
        rb_ary_push(errors, rb_class_new_instance(2, error_argv, rb_cYARPParseError));
@ -228,19 +296,20 @@ parser_errors(yp_parser_t *parser, rb_encoding *encoding) {
 // Extract the warnings out of the parser into an array.
 static VALUE
-parser_warnings(yp_parser_t *parser, rb_encoding *encoding) {
+parser_warnings(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
    VALUE warnings = rb_ary_new();
    yp_diagnostic_t *warning;
    for (warning = (yp_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (yp_diagnostic_t *) warning->node.next) {
        VALUE location_argv[] = {
            source,
            LONG2FIX(warning->start - parser->start),
            LONG2FIX(warning->end - parser->start)
        };
        VALUE warning_argv[] = {
            rb_enc_str_new_cstr(warning->message, encoding),
-            rb_class_new_instance(2, location_argv, rb_cYARPLocation)
+            rb_class_new_instance(3, location_argv, rb_cYARPLocation)
        };
        rb_ary_push(warnings, rb_class_new_instance(2, warning_argv, rb_cYARPParseWarning));
@ -249,22 +318,36 @@ parser_warnings(yp_parser_t *parser, rb_encoding *encoding) {
    return warnings;
 }
 /******************************************************************************/
 /* Lexing Ruby code                                                           */
 /******************************************************************************/
 // This struct gets stored in the parser and passed in to the lex callback any
 // time a new token is found. We use it to store the necessary information to
 // initialize a Token instance.
 typedef struct {
    VALUE source;
    VALUE tokens;
    rb_encoding *encoding;
 } lex_data_t;
 // This is passed as a callback to the parser. It gets called every time a new
 // token is found. Once found, we initialize a new instance of Token and push it
 // onto the tokens array.
 static void
 lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
    lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
    VALUE yields = rb_ary_new_capa(2);
-    rb_ary_push(yields, yp_token_new(parser, token, lex_data->encoding));
+    rb_ary_push(yields, yp_token_new(parser, token, lex_data->encoding, lex_data->source));
    rb_ary_push(yields, INT2FIX(parser->lex_state));
    rb_ary_push(lex_data->tokens, yields);
 }
 // This is called whenever the encoding changes based on the magic comment at
 // the top of the file. We use it to update the encoding that we are using to
 // create tokens.
 static void
 lex_encoding_changed_callback(yp_parser_t *parser) {
    lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
@ -273,30 +356,42 @@ lex_encoding_changed_callback(yp_parser_t *parser) {
 // Return an array of tokens corresponding to the given source.
 static VALUE
-lex_source(source_t *source, char *filepath) {
+lex_input(input_t *input, const char *filepath) {
    yp_parser_t parser;
-    yp_parser_init(&parser, source->source, source->size, filepath);
+    yp_parser_init(&parser, input->source, input->size, filepath);
    yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback);
    VALUE offsets = rb_ary_new();
    VALUE source_argv[] = { rb_str_new(input->source, input->size), offsets };
    VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
    lex_data_t lex_data = {
        .source = source,
        .tokens = rb_ary_new(),
        .encoding = rb_utf8_encoding()
    };
-    void *data = (void *) &lex_data;
+    lex_data_t *data = &lex_data;
    yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
-        .data = data,
+        .data = (void *) data,
        .callback = lex_token,
    };
    parser.lex_callback = &lex_callback;
    yp_node_t *node = yp_parse(&parser);
    // Here we need to update the source range to have the correct newline
    // offsets. We do it here because we've already created the object and given
    // it over to all of the tokens.
    for (size_t index = 0; index < parser.newline_list.size; index++) {
        rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
    }
    VALUE result_argv[] = {
        lex_data.tokens,
-        parser_comments(&parser),
+        parser_comments(&parser, source),
-        parser_errors(&parser, lex_data.encoding),
+        parser_errors(&parser, lex_data.encoding, source),
-        parser_warnings(&parser, lex_data.encoding)
+        parser_warnings(&parser, lex_data.encoding, source)
    };
    VALUE result = rb_class_new_instance(4, result_argv, rb_cYARPParseResult);
@ -309,40 +404,49 @@ lex_source(source_t *source, char *filepath) {
 // Return an array of tokens corresponding to the given string.
 static VALUE
-lex(VALUE self, VALUE string, VALUE filepath) {
+lex(int argc, VALUE *argv, VALUE self) {
-    source_t source;
+    VALUE string;
-    source_string_load(&source, string);
+    VALUE filepath;
-    char *filepath_char = NULL;
+    rb_scan_args(argc, argv, "11", &string, &filepath);
-    if (filepath) {
+
-        filepath_char = StringValueCStr(filepath);
+    input_t input;
-    }
+    input_load_string(&input, string);
-    return lex_source(&source, filepath_char);
+    return lex_input(&input, check_filepath(filepath));
 }
 // Return an array of tokens corresponding to the given file.
 static VALUE
 lex_file(VALUE self, VALUE filepath) {
-    source_t source;
+    input_t input;
-    if (source_file_load(&source, filepath) != 0) return Qnil;
+
    const char *checked = check_filepath(filepath);
    if (input_load_filepath(&input, checked) != 0) return Qnil;
    VALUE value = lex_input(&input, checked);
    input_unload_filepath(&input);
    VALUE value = lex_source(&source, StringValueCStr(filepath));
    source_file_unload(&source);
    return value;
 }
 /******************************************************************************/
 /* Parsing Ruby code                                                          */
 /******************************************************************************/
 // Parse the given input and return a ParseResult instance.
 static VALUE
-parse_source(source_t *source, char *filepath) {
+parse_input(input_t *input, const char *filepath) {
    yp_parser_t parser;
-    yp_parser_init(&parser, source->source, source->size, filepath);
+    yp_parser_init(&parser, input->source, input->size, filepath);
    yp_node_t *node = yp_parse(&parser);
    rb_encoding *encoding = rb_enc_find(parser.encoding.name);
    VALUE source = yp_source_new(&parser);
    VALUE result_argv[] = {
        yp_ast_new(&parser, node, encoding),
-        parser_comments(&parser),
+        parser_comments(&parser, source),
-        parser_errors(&parser, encoding),
+        parser_errors(&parser, encoding, source),
-        parser_warnings(&parser, encoding)
+        parser_warnings(&parser, encoding, source)
    };
    VALUE result = rb_class_new_instance(4, result_argv, rb_cYARPParseResult);
@ -353,40 +457,58 @@ parse_source(source_t *source, char *filepath) {
    return result;
 }
 // Parse the given string and return a ParseResult instance.
 static VALUE
-parse(VALUE self, VALUE string, VALUE filepath) {
+parse(int argc, VALUE *argv, VALUE self) {
-    source_t source;
+    VALUE string;
-    source_string_load(&source, string);
+    VALUE filepath;
    rb_scan_args(argc, argv, "11", &string, &filepath);
    input_t input;
    input_load_string(&input, string);
 #ifdef YARP_DEBUG_MODE_BUILD
-    char* dup = malloc(source.size);
+    char* dup = malloc(input.size);
-    memcpy(dup, source.source, source.size);
+    memcpy(dup, input.source, input.size);
-    source.source = dup;
+    input.source = dup;
 #endif
-    VALUE value = parse_source(&source, NIL_P(filepath) ? NULL : StringValueCStr(filepath));
+
    VALUE value = parse_input(&input, check_filepath(filepath));
 #ifdef YARP_DEBUG_MODE_BUILD
    free(dup);
 #endif
    return value;
 }
 // Parse the given file and return a ParseResult instance.
 static VALUE
-parse_file(VALUE self, VALUE rb_filepath) {
+parse_file(VALUE self, VALUE filepath) {
-    source_t source;
+    input_t input;
-    if (source_file_load(&source, rb_filepath) != 0) {
+
-        return Qnil;
+    const char *checked = check_filepath(filepath);
-    }
+    if (input_load_filepath(&input, checked) != 0) return Qnil;
    VALUE value = parse_input(&input, checked);
    input_unload_filepath(&input);
    VALUE value = parse_source(&source, StringValueCStr(rb_filepath));
    source_file_unload(&source);
    return value;
 }
 /******************************************************************************/
 /* Utility functions exposed to make testing easier                           */
 /******************************************************************************/
 // Returns an array of strings corresponding to the named capture groups in the
 // given source string. If YARP was unable to parse the regular expression, this
 // function returns nil.
 static VALUE
-named_captures(VALUE self, VALUE rb_source) {
+named_captures(VALUE self, VALUE source) {
    yp_string_list_t string_list;
    yp_string_list_init(&string_list);
-    if (!yp_regexp_named_capture_group_names(RSTRING_PTR(rb_source), RSTRING_LEN(rb_source), &string_list)) {
+    if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list)) {
        yp_string_list_free(&string_list);
        return Qnil;
    }
@ -401,6 +523,8 @@ named_captures(VALUE self, VALUE rb_source) {
    return names;
 }
 // Accepts a source string and a type of unescaping and returns the unescaped
 // version.
 static VALUE
 unescape(VALUE source, yp_unescape_type_t unescape_type) {
    yp_string_t string;
@ -409,7 +533,13 @@ unescape(VALUE source, yp_unescape_type_t unescape_type) {
    yp_list_t error_list;
    yp_list_init(&error_list);
-    yp_unescape_manipulate_string(RSTRING_PTR(source), RSTRING_LEN(source), &string, unescape_type, &error_list);
+    const char *start = RSTRING_PTR(source);
    size_t length = RSTRING_LEN(source);
    yp_parser_t parser;
    yp_parser_init(&parser, start, length, "");
    yp_unescape_manipulate_string(&parser, start, length, &string, unescape_type, &error_list);
    if (yp_list_empty_p(&error_list)) {
        result = rb_str_new(yp_string_source(&string), yp_string_length(&string));
    } else {
@ -418,27 +548,32 @@ unescape(VALUE source, yp_unescape_type_t unescape_type) {
    yp_string_free(&string);
    yp_list_free(&error_list);
    yp_parser_free(&parser);
    return result;
 }
 // Do not unescape anything in the given string. This is here to provide a
 // consistent API.
 static VALUE
 unescape_none(VALUE self, VALUE source) {
    return unescape(source, YP_UNESCAPE_NONE);
 }
 // Minimally unescape the given string. This means effectively unescaping just
 // the quotes of a string. Returns the unescaped string.
 static VALUE
 unescape_minimal(VALUE self, VALUE source) {
    return unescape(source, YP_UNESCAPE_MINIMAL);
 }
 // Unescape everything in the given string. Return the unescaped string.
 static VALUE
 unescape_all(VALUE self, VALUE source) {
    return unescape(source, YP_UNESCAPE_ALL);
 }
-// This function returns a hash of information about the given source string's
+// Return a hash of information about the given source string's memory usage.
 // memory usage.
 static VALUE
 memsize(VALUE self, VALUE string) {
    yp_parser_t parser;
@ -459,28 +594,17 @@ memsize(VALUE self, VALUE string) {
    return result;
 }
-static VALUE
+// Parse the file, but do nothing with the result. This is used to profile the
-compile(VALUE self, VALUE string) {
+// parser for memory and speed.
    yp_parser_t parser;
    size_t length = RSTRING_LEN(string);
    yp_parser_init(&parser, RSTRING_PTR(string), length, NULL);
    yp_node_t *node = yp_parse(&parser);
    VALUE result = yp_compile(node);
    yp_node_destroy(&parser, node);
    yp_parser_free(&parser);
    return result;
 }
 static VALUE
 profile_file(VALUE self, VALUE filepath) {
-    source_t source;
+    input_t input;
-    if (source_file_load(&source, filepath) != 0) return Qnil;
+
    const char *checked = check_filepath(filepath);
    if (input_load_filepath(&input, checked) != 0) return Qnil;
    yp_parser_t parser;
-    yp_parser_init(&parser, source.source, source.size, StringValueCStr(filepath));
+    yp_parser_init(&parser, input.source, input.size, checked);
    yp_node_t *node = yp_parse(&parser);
    yp_node_destroy(&parser, node);
@ -491,9 +615,8 @@ profile_file(VALUE self, VALUE filepath) {
 // The function takes a source string and returns a Ruby array containing the
 // offsets of every newline in the string. (It also includes a 0 at the
-// beginning to indicate the position of the first line.)
+// beginning to indicate the position of the first line.) It accepts a string as
-//
+// its only argument and returns an array of integers.
 // It accepts a string as its only argument and returns an array of integers.
 static VALUE
 newlines(VALUE self, VALUE string) {
    yp_parser_t parser;
@ -512,46 +635,56 @@ newlines(VALUE self, VALUE string) {
    return result;
 }
 /******************************************************************************/
 /* Initialization of the extension                                            */
 /******************************************************************************/
 RUBY_FUNC_EXPORTED void
 Init_yarp(void) {
    // Make sure that the YARP library version matches the expected version.
    // Otherwise something was compiled incorrectly.
    if (strcmp(yp_version(), EXPECTED_YARP_VERSION) != 0) {
-        rb_raise(rb_eRuntimeError, "The YARP library version (%s) does not match the expected version (%s)", yp_version(),
+        rb_raise(
-                         EXPECTED_YARP_VERSION);
+            rb_eRuntimeError,
            "The YARP library version (%s) does not match the expected version (%s)",
            yp_version(),
            EXPECTED_YARP_VERSION
        );
    }
    // Grab up references to all of the constants that we're going to need to
    // reference throughout this extension.
    rb_cYARP = rb_define_module("YARP");
    rb_cYARPSource = rb_define_class_under(rb_cYARP, "Source", rb_cObject);
    rb_cYARPToken = rb_define_class_under(rb_cYARP, "Token", rb_cObject);
    rb_cYARPLocation = rb_define_class_under(rb_cYARP, "Location", rb_cObject);
    rb_cYARPComment = rb_define_class_under(rb_cYARP, "Comment", rb_cObject);
    rb_cYARPParseError = rb_define_class_under(rb_cYARP, "ParseError", rb_cObject);
    rb_cYARPParseWarning = rb_define_class_under(rb_cYARP, "ParseWarning", rb_cObject);
    rb_cYARPParseResult = rb_define_class_under(rb_cYARP, "ParseResult", rb_cObject);
-    rb_define_const(rb_cYARP, "VERSION", rb_sprintf("%d.%d.%d", YP_VERSION_MAJOR, YP_VERSION_MINOR, YP_VERSION_PATCH));
+    // Define the version string here so that we can use the constants defined
    // in yarp.h.
    rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));
-    rb_define_singleton_method(rb_cYARP, "dump", dump, 2);
+    // First, the functions that have to do with lexing and parsing.
    rb_define_singleton_method(rb_cYARP, "dump", dump, -1);
    rb_define_singleton_method(rb_cYARP, "dump_file", dump_file, 1);
-
+    rb_define_singleton_method(rb_cYARP, "lex", lex, -1);
    rb_define_singleton_method(rb_cYARP, "lex", lex, 2);
    rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
-
+    rb_define_singleton_method(rb_cYARP, "parse", parse, -1);
    rb_define_singleton_method(rb_cYARP, "_parse", parse, 2);
    rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);
    // Next, the functions that will be called by the parser to perform various
    // internal tasks. We expose these to make them easier to test.
    rb_define_singleton_method(rb_cYARP, "named_captures", named_captures, 1);
    rb_define_singleton_method(rb_cYARP, "unescape_none", unescape_none, 1);
    rb_define_singleton_method(rb_cYARP, "unescape_minimal", unescape_minimal, 1);
    rb_define_singleton_method(rb_cYARP, "unescape_all", unescape_all, 1);
    rb_define_singleton_method(rb_cYARP, "memsize", memsize, 1);
    rb_define_singleton_method(rb_cYARP, "compile", compile, 1);
    rb_define_singleton_method(rb_cYARP, "profile_file", profile_file, 1);
    rb_define_singleton_method(rb_cYARP, "newlines", newlines, 1);
    // Next, initialize the pack API.
    Init_yarp_pack();
 }
--- a/yarp/extension.h
+++ b/yarp/extension.h
@ -5,11 +5,11 @@
 #include <ruby/encoding.h>
 #include "yarp.h"
-#include <fcntl.h>
+// The following headers are necessary to read files using demand paging.
 #ifdef _WIN32
 #include <windows.h>
 #else
 #include <fcntl.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <unistd.h>
@ -17,16 +17,11 @@
 #define EXPECTED_YARP_VERSION "0.4.0"
-VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding);
+VALUE yp_source_new(yp_parser_t *parser);
-
+VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
 VALUE yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding);
 VALUE yp_compile(yp_node_t *node);
 void Init_yarp_pack(void);
 YP_EXPORTED_FUNCTION void Init_yarp(void);
-#define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v))
+#endif
 #endif // YARP_EXT_NODE_H
--- a/yarp/missing.h
+++ b/yarp/missing.h
@ -1,20 +0,0 @@
 #ifndef YARP_MISSING_H
 #define YARP_MISSING_H
 #include "yarp/defines.h"
 #include <ctype.h>
 #include <stddef.h>
 #include <string.h>
 const char * yp_strnstr(const char *haystack, const char *needle, size_t length);
 int yp_strncasecmp(const char *string1, const char *string2, size_t length);
 #ifndef HAVE_STRNCASECMP
 #ifndef strncasecmp
 #define strncasecmp yp_strncasecmp
 #endif
 #endif
 #endif
--- a/yarp/node.h
+++ b/yarp/node.h
@ -2,8 +2,6 @@
 #define YARP_NODE_H
 #include "yarp/defines.h"
 #include "yarp.h"
 #include "yarp/parser.h"
 // Append a token to the given list.
@ -15,6 +13,20 @@ void yp_node_list_append(yp_node_list_t *list, yp_node_t *node);
 // Clear the node but preserves the location.
 void yp_node_clear(yp_node_t *node);
 // Deallocate a node and all of its children.
 YP_EXPORTED_FUNCTION void yp_node_destroy(yp_parser_t *parser, struct yp_node *node);
 // This struct stores the information gathered by the yp_node_memsize function.
 // It contains both the memory footprint and additionally metadata about the
 // shape of the tree.
 typedef struct {
    size_t memsize;
    size_t node_count;
 } yp_memsize_t;
 // Calculates the memory footprint of a given node.
 YP_EXPORTED_FUNCTION void yp_node_memsize(yp_node_t *node, yp_memsize_t *memsize);
 #define YP_EMPTY_NODE_LIST ((yp_node_list_t) { .nodes = NULL, .size = 0, .capacity = 0 })
 #define YP_EMPTY_LOCATION_LIST ((yp_location_list_t) { .locations = NULL, .size = 0, .capacity = 0 })
--- a/yarp/pack.h
+++ b/yarp/pack.h
@ -3,8 +3,8 @@
 #include "yarp/defines.h"
 #include <stdlib.h>
 #include <stdint.h>
 #include <stdlib.h>
 typedef enum yp_pack_version {
    YP_PACK_VERSION_3_2_0
--- a/yarp/parser.h
+++ b/yarp/parser.h
@ -1,17 +1,16 @@
 #ifndef YARP_PARSER_H
 #define YARP_PARSER_H
 #include "yarp/defines.h"
 #include <stdbool.h>
 #include "yarp/ast.h"
 #include "yarp/defines.h"
 #include "yarp/enc/yp_encoding.h"
 #include "yarp/util/yp_constant_pool.h"
 #include "yarp/util/yp_list.h"
 #include "yarp/util/yp_newline_list.h"
 #include "yarp/util/yp_state_stack.h"
 #include <stdbool.h>
 // This enum provides various bits that represent different kinds of states that
 // the lexer can track. This is used to determine which kind of token to return
 // based on the context of the parser.
--- a/yarp/prettyprint.c
+++ b/yarp/prettyprint.c
@ -5,6 +5,8 @@
 /* if you are looking to modify the                                           */
 /* template                                                                   */
 /******************************************************************************/
 #include "yarp/defines.h"
 #include <stdio.h>
 #include "yarp/ast.h"
@ -14,7 +16,7 @@
 static void
 prettyprint_location(yp_buffer_t *buffer, yp_parser_t *parser, yp_location_t *location) {
    char printed[] = "[0000-0000]";
-    sprintf(printed, "[%04ld-%04ld]", (long int)(location->start - parser->start), (long int)(location->end - parser->start));
+    yp_snprintf(printed, sizeof(printed), "[%04ld-%04ld]", (long int)(location->start - parser->start), (long int)(location->end - parser->start));
    yp_buffer_append_str(buffer, printed, strlen(printed));
 }
@ -189,7 +191,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
                        for (uint32_t index = 0; index < ((yp_block_node_t *)node)->locals.size; index++) {
                if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
                char locals_buffer[12];
-                sprintf(locals_buffer, "%u", ((yp_block_node_t *)node)->locals.ids[index]);
+                yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_block_node_t *)node)->locals.ids[index]);
                yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
            }
            yp_buffer_append_str(buffer, ", ", 2);            if (((yp_block_node_t *)node)->parameters == NULL) {
@ -291,7 +293,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
                prettyprint_node(buffer, parser, (yp_node_t *)((yp_call_node_t *)node)->block);
            }
            yp_buffer_append_str(buffer, ", ", 2);            char flags_buffer[12];
-            sprintf(flags_buffer, "+%d", ((yp_call_node_t *)node)->flags);
+            yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_call_node_t *)node)->flags);
            yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
            yp_buffer_append_str(buffer, ", ", 2);            yp_buffer_append_str(buffer, "\"", 1);
            yp_buffer_append_str(buffer, yp_string_source(&((yp_call_node_t *)node)->name), yp_string_length(&((yp_call_node_t *)node)->name));
@ -321,7 +323,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_call_operator_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_call_operator_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char operator_id_buffer[12];
-            sprintf(operator_id_buffer, "%u", ((yp_call_operator_write_node_t *)node)->operator_id);
+            yp_snprintf(operator_id_buffer, sizeof(operator_id_buffer), "%u", ((yp_call_operator_write_node_t *)node)->operator_id);
            yp_buffer_append_str(buffer, operator_id_buffer, strlen(operator_id_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -360,7 +362,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
                        for (uint32_t index = 0; index < ((yp_class_node_t *)node)->locals.size; index++) {
                if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
                char locals_buffer[12];
-                sprintf(locals_buffer, "%u", ((yp_class_node_t *)node)->locals.ids[index]);
+                yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_class_node_t *)node)->locals.ids[index]);
                yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
            }
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_class_node_t *)node)->class_keyword_loc);
@ -406,7 +408,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_class_variable_operator_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_class_variable_operator_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char operator_buffer[12];
-            sprintf(operator_buffer, "%u", ((yp_class_variable_operator_write_node_t *)node)->operator);
+            yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_class_variable_operator_write_node_t *)node)->operator);
            yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -454,7 +456,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_constant_operator_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_constant_operator_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char operator_buffer[12];
-            sprintf(operator_buffer, "%u", ((yp_constant_operator_write_node_t *)node)->operator);
+            yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_constant_operator_write_node_t *)node)->operator);
            yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -493,7 +495,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_constant_path_operator_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_constant_path_operator_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char operator_buffer[12];
-            sprintf(operator_buffer, "%u", ((yp_constant_path_operator_write_node_t *)node)->operator);
+            yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_constant_path_operator_write_node_t *)node)->operator);
            yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -540,7 +542,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            for (uint32_t index = 0; index < ((yp_def_node_t *)node)->locals.size; index++) {
                if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
                char locals_buffer[12];
-                sprintf(locals_buffer, "%u", ((yp_def_node_t *)node)->locals.ids[index]);
+                yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_def_node_t *)node)->locals.ids[index]);
                yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
            }
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_def_node_t *)node)->def_keyword_loc);
@ -734,7 +736,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_global_variable_operator_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_global_variable_operator_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char operator_buffer[12];
-            sprintf(operator_buffer, "%u", ((yp_global_variable_operator_write_node_t *)node)->operator);
+            yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_global_variable_operator_write_node_t *)node)->operator);
            yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -871,7 +873,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_instance_variable_operator_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_instance_variable_operator_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char operator_buffer[12];
-            sprintf(operator_buffer, "%u", ((yp_instance_variable_operator_write_node_t *)node)->operator);
+            yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_instance_variable_operator_write_node_t *)node)->operator);
            yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -911,7 +913,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            }
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_interpolated_regular_expression_node_t *)node)->closing_loc);
            yp_buffer_append_str(buffer, ", ", 2);            char flags_buffer[12];
-            sprintf(flags_buffer, "+%d", ((yp_interpolated_regular_expression_node_t *)node)->flags);
+            yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_interpolated_regular_expression_node_t *)node)->flags);
            yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -1001,7 +1003,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
                        for (uint32_t index = 0; index < ((yp_lambda_node_t *)node)->locals.size; index++) {
                if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
                char locals_buffer[12];
-                sprintf(locals_buffer, "%u", ((yp_lambda_node_t *)node)->locals.ids[index]);
+                yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_lambda_node_t *)node)->locals.ids[index]);
                yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
            }
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_lambda_node_t *)node)->opening_loc);
@ -1024,7 +1026,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_local_variable_operator_and_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_and_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char constant_id_buffer[12];
-            sprintf(constant_id_buffer, "%u", ((yp_local_variable_operator_and_write_node_t *)node)->constant_id);
+            yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_operator_and_write_node_t *)node)->constant_id);
            yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -1035,7 +1037,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_local_variable_operator_or_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_or_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char constant_id_buffer[12];
-            sprintf(constant_id_buffer, "%u", ((yp_local_variable_operator_or_write_node_t *)node)->constant_id);
+            yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_operator_or_write_node_t *)node)->constant_id);
            yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -1046,10 +1048,10 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_local_variable_operator_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char constant_id_buffer[12];
-            sprintf(constant_id_buffer, "%u", ((yp_local_variable_operator_write_node_t *)node)->constant_id);
+            yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_operator_write_node_t *)node)->constant_id);
            yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
            yp_buffer_append_str(buffer, ", ", 2);            char operator_id_buffer[12];
-            sprintf(operator_id_buffer, "%u", ((yp_local_variable_operator_write_node_t *)node)->operator_id);
+            yp_snprintf(operator_id_buffer, sizeof(operator_id_buffer), "%u", ((yp_local_variable_operator_write_node_t *)node)->operator_id);
            yp_buffer_append_str(buffer, operator_id_buffer, strlen(operator_id_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -1057,10 +1059,10 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
        case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
            yp_buffer_append_str(buffer, "LocalVariableReadNode(", 22);
                        char constant_id_buffer[12];
-            sprintf(constant_id_buffer, "%u", ((yp_local_variable_read_node_t *)node)->constant_id);
+            yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_read_node_t *)node)->constant_id);
            yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
            yp_buffer_append_str(buffer, ", ", 2);            char depth_buffer[12];
-            sprintf(depth_buffer, "+%d", ((yp_local_variable_read_node_t *)node)->depth);
+            yp_snprintf(depth_buffer, sizeof(depth_buffer), "+%d", ((yp_local_variable_read_node_t *)node)->depth);
            yp_buffer_append_str(buffer, depth_buffer, strlen(depth_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -1068,10 +1070,10 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
        case YP_NODE_LOCAL_VARIABLE_WRITE_NODE: {
            yp_buffer_append_str(buffer, "LocalVariableWriteNode(", 23);
                        char constant_id_buffer[12];
-            sprintf(constant_id_buffer, "%u", ((yp_local_variable_write_node_t *)node)->constant_id);
+            yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_write_node_t *)node)->constant_id);
            yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
            yp_buffer_append_str(buffer, ", ", 2);            char depth_buffer[12];
-            sprintf(depth_buffer, "+%d", ((yp_local_variable_write_node_t *)node)->depth);
+            yp_snprintf(depth_buffer, sizeof(depth_buffer), "+%d", ((yp_local_variable_write_node_t *)node)->depth);
            yp_buffer_append_str(buffer, depth_buffer, strlen(depth_buffer));
            yp_buffer_append_str(buffer, ", ", 2);            if (((yp_local_variable_write_node_t *)node)->value == NULL) {
                yp_buffer_append_str(buffer, "nil", 3);
@ -1113,7 +1115,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
                        for (uint32_t index = 0; index < ((yp_module_node_t *)node)->locals.size; index++) {
                if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
                char locals_buffer[12];
-                sprintf(locals_buffer, "%u", ((yp_module_node_t *)node)->locals.ids[index]);
+                yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_module_node_t *)node)->locals.ids[index]);
                yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
            }
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_module_node_t *)node)->module_keyword_loc);
@ -1187,7 +1189,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
        case YP_NODE_OPTIONAL_PARAMETER_NODE: {
            yp_buffer_append_str(buffer, "OptionalParameterNode(", 22);
                        char constant_id_buffer[12];
-            sprintf(constant_id_buffer, "%u", ((yp_optional_parameter_node_t *)node)->constant_id);
+            yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_optional_parameter_node_t *)node)->constant_id);
            yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_optional_parameter_node_t *)node)->name_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_optional_parameter_node_t *)node)->operator_loc);
@ -1298,7 +1300,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
                        for (uint32_t index = 0; index < ((yp_program_node_t *)node)->locals.size; index++) {
                if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
                char locals_buffer[12];
-                sprintf(locals_buffer, "%u", ((yp_program_node_t *)node)->locals.ids[index]);
+                yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_program_node_t *)node)->locals.ids[index]);
                yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
            }
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_program_node_t *)node)->statements);
@ -1319,7 +1321,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            }
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_range_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            char flags_buffer[12];
-            sprintf(flags_buffer, "+%d", ((yp_range_node_t *)node)->flags);
+            yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_range_node_t *)node)->flags);
            yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -1344,7 +1346,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, yp_string_source(&((yp_regular_expression_node_t *)node)->unescaped), yp_string_length(&((yp_regular_expression_node_t *)node)->unescaped));
            yp_buffer_append_str(buffer, "\"", 1);
            yp_buffer_append_str(buffer, ", ", 2);            char flags_buffer[12];
-            sprintf(flags_buffer, "+%d", ((yp_regular_expression_node_t *)node)->flags);
+            yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_regular_expression_node_t *)node)->flags);
            yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -1363,7 +1365,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
        case YP_NODE_REQUIRED_PARAMETER_NODE: {
            yp_buffer_append_str(buffer, "RequiredParameterNode(", 22);
                        char constant_id_buffer[12];
-            sprintf(constant_id_buffer, "%u", ((yp_required_parameter_node_t *)node)->constant_id);
+            yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_required_parameter_node_t *)node)->constant_id);
            yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -1443,7 +1445,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
                        for (uint32_t index = 0; index < ((yp_singleton_class_node_t *)node)->locals.size; index++) {
                if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
                char locals_buffer[12];
-                sprintf(locals_buffer, "%u", ((yp_singleton_class_node_t *)node)->locals.ids[index]);
+                yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_singleton_class_node_t *)node)->locals.ids[index]);
                yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
            }
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_singleton_class_node_t *)node)->class_keyword_loc);
--- a/yarp/regexp.c
+++ b/yarp/regexp.c
@ -374,7 +374,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
            case '#': { // inline comments
                bool found = yp_regexp_char_find(parser, ')');
                // the close paren we found is escaped, we need to find another
-                while (parser->start <= parser->cursor - 2 && *(parser->cursor - 2) == '\\') {
+                while (found && (parser->start <= parser->cursor - 2) && (*(parser->cursor - 2) == '\\')) {
                    found = yp_regexp_char_find(parser, ')');
                }
                return found;
--- a/yarp/regexp.h
+++ b/yarp/regexp.h
@ -2,15 +2,14 @@
 #define YARP_REGEXP_H
 #include "yarp/defines.h"
 #include "yarp/parser.h"
 #include "yarp/util/yp_string_list.h"
 #include "yarp/util/yp_string.h"
 #include <stdbool.h>
 #include <stddef.h>
 #include <string.h>
 #include "yarp/util/yp_string_list.h"
 #include "yarp/util/yp_string.h"
 // Parse a regular expression and extract the names of all of the named capture
 // groups.
 YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures);
--- a/yarp/unescape.c
+++ b/yarp/unescape.c
@ -438,14 +438,14 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
 // \c? or \C-?    delete, ASCII 7Fh (DEL)
 //
 YP_EXPORTED_FUNCTION void
-yp_unescape_manipulate_string(const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list) {
+yp_unescape_manipulate_string(yp_parser_t *parser, const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list) {
    if (unescape_type == YP_UNESCAPE_NONE) {
        // If we're not unescaping then we can reference the source directly.
        yp_string_shared_init(string, value, value + length);
        return;
    }
-    const char *backslash = memchr(value, '\\', length);
+    const char *backslash = yp_memchr(parser, value, '\\', length);
    if (backslash == NULL) {
        // Here there are no escapes, so we can reference the source directly.
@ -509,7 +509,7 @@ yp_unescape_manipulate_string(const char *value, size_t length, yp_string_t *str
        }
        if (end > cursor) {
-            backslash = memchr(cursor, '\\', (size_t) (end - cursor));
+            backslash = yp_memchr(parser, cursor, '\\', (size_t) (end - cursor));
        } else {
            backslash = NULL;
        }
--- a/yarp/unescape.h
+++ b/yarp/unescape.h
@ -2,17 +2,18 @@
 #define YARP_UNESCAPE_H
 #include "yarp/defines.h"
 #include "yarp/diagnostic.h"
 #include "yarp/parser.h"
 #include "yarp/util/yp_char.h"
 #include "yarp/util/yp_list.h"
 #include "yarp/util/yp_memchr.h"
 #include "yarp/util/yp_string.h"
 #include <assert.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <string.h>
 #include "yarp/diagnostic.h"
 #include "yarp/util/yp_char.h"
 #include "yarp/util/yp_list.h"
 #include "yarp/util/yp_string.h"
 // The type of unescape we are performing.
 typedef enum {
    // When we're creating a string inside of a list literal like %w, we
@ -30,7 +31,7 @@ typedef enum {
 // Unescape the contents of the given token into the given string using the
 // given unescape mode.
-YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list);
+YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser, const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list);
 YP_EXPORTED_FUNCTION size_t yp_unescape_calculate_difference(const char *value, const char *end, yp_unescape_type_t unescape_type, bool expect_single_codepoint, yp_list_t *error_list);
--- a/yarp/util/yp_char.h
+++ b/yarp/util/yp_char.h
@ -2,12 +2,11 @@
 #define YP_CHAR_H
 #include "yarp/defines.h"
 #include "yarp/util/yp_newline_list.h"
 #include <stdbool.h>
 #include <stddef.h>
 #include "yarp/util/yp_newline_list.h"
 // Returns the number of characters at the start of the string that are
 // whitespace. Disallows searching past the given maximum number of characters.
 size_t yp_strspn_whitespace(const char *string, ptrdiff_t length);
--- a/yarp/util/yp_constant_pool.h
+++ b/yarp/util/yp_constant_pool.h
@ -6,13 +6,13 @@
 #ifndef YP_CONSTANT_POOL_H
 #define YP_CONSTANT_POOL_H
 #include "yarp/defines.h"
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include "yarp/defines.h"
 typedef uint32_t yp_constant_id_t;
 typedef struct {
--- a/yarp/util/yp_memchr.c
+++ b/yarp/util/yp_memchr.c
@ -0,0 +1,31 @@
 #include "yarp/util/yp_memchr.h"
 #define YP_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
 // We need to roll our own memchr to handle cases where the encoding changes and
 // we need to search for a character in a buffer that could be the trailing byte
 // of a multibyte character.
 void *
 yp_memchr(yp_parser_t *parser, const void *memory, int character, size_t number) {
    if (parser->encoding_changed && parser->encoding.multibyte && character >= YP_MEMCHR_TRAILING_BYTE_MINIMUM) {
        const char *source = (const char *) memory;
        size_t index = 0;
        while (index < number) {
            if (source[index] == character) {
                return (void *) (source + index);
            }
            size_t width = parser->encoding.char_width(source + index);
            if (width == 0) {
                return NULL;
            }
            index += width;
        }
        return NULL;
    } else {
        return memchr(memory, character, number);
    }
 }
--- a/yarp/util/yp_memchr.h
+++ b/yarp/util/yp_memchr.h
@ -0,0 +1,14 @@
 #ifndef YP_MEMCHR_H
 #define YP_MEMCHR_H
 #include "yarp/defines.h"
 #include "yarp/parser.h"
 #include <stddef.h>
 // We need to roll our own memchr to handle cases where the encoding changes and
 // we need to search for a character in a buffer that could be the trailing byte
 // of a multibyte character.
 void * yp_memchr(yp_parser_t *parser, const void *source, int character, size_t number);
 #endif
--- a/yarp/util/yp_newline_list.c
+++ b/yarp/util/yp_newline_list.c
@ -31,7 +31,7 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
    }
    assert(cursor >= list->start);
-    list->offsets[list->size++] = (size_t) (cursor - list->start);
+    list->offsets[list->size++] = (size_t) (cursor - list->start + 1);
    return true;
 }
--- a/yarp/util/yp_newline_list.h
+++ b/yarp/util/yp_newline_list.h
@ -9,13 +9,13 @@
 #ifndef YP_NEWLINE_LIST_H
 #define YP_NEWLINE_LIST_H
 #include <assert.h>
 #include <stddef.h>
 #include <stdbool.h>
 #include <stdlib.h>
 #include "yarp/defines.h"
 #include <assert.h>
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdlib.h>
 // A list of offsets of newlines in a string. The offsets are assumed to be
 // sorted/inserted in ascending order.
 typedef struct {
--- a/yarp/util/yp_snprintf.c
+++ b/yarp/util/yp_snprintf.c
@ -0,0 +1,14 @@
 #include "yarp/defines.h"
 #ifndef HAVE_SNPRINTF
 // In case snprintf isn't present on the system, we provide our own that simply
 // forwards to the less-safe sprintf.
 int
 yp_snprintf(char *dest, YP_ATTRIBUTE_UNUSED size_t size, const char *format, ...) {
    va_list args;
    va_start(args, format);
    int result = vsprintf(dest, format, args);
    va_end(args);
    return result;
 }
 #endif
--- a/yarp/util/yp_string.c
+++ b/yarp/util/yp_string.c
@ -1,11 +1,5 @@
 #include "yarp/util/yp_string.h"
 // Allocate a new yp_string_t.
 yp_string_t *
 yp_string_alloc(void) {
    return (yp_string_t *) malloc(sizeof(yp_string_t));
 }
 // Initialize a shared string that is based on initial input.
 void
 yp_string_shared_init(yp_string_t *string, const char *start, const char *end) {
--- a/yarp/util/yp_string.h
+++ b/yarp/util/yp_string.h
@ -29,9 +29,6 @@ typedef struct {
    } as;
 } yp_string_t;
 // Allocate a new yp_string_t.
 yp_string_t * yp_string_alloc(void);
 // Initialize a shared string that is based on initial input.
 void yp_string_shared_init(yp_string_t *string, const char *start, const char *end);
--- a/yarp/util/yp_string_list.h
+++ b/yarp/util/yp_string_list.h
@ -2,12 +2,11 @@
 #define YARP_STRING_LIST_H
 #include "yarp/defines.h"
 #include "yarp/util/yp_string.h"
 #include <stddef.h>
 #include <stdlib.h>
 #include "yarp/util/yp_string.h"
 typedef struct {
    yp_string_t *strings;
    size_t length;
--- a/yarp/util/yp_strncasecmp.c
+++ b/yarp/util/yp_strncasecmp.c
@ -1,19 +1,5 @@
-#include "yarp/missing.h"
+#include <ctype.h>
-
+#include <stddef.h>
 const char *
 yp_strnstr(const char *haystack, const char *needle, size_t length) {
    size_t needle_length = strlen(needle);
    if (needle_length > length) return NULL;
    const char *haystack_limit = haystack + length - needle_length + 1;
    while ((haystack = memchr(haystack, needle[0], (size_t) (haystack_limit - haystack))) != NULL) {
        if (!strncmp(haystack, needle, needle_length)) return haystack;
        haystack++;
    }
    return NULL;
 }
 int
 yp_strncasecmp(const char *string1, const char *string2, size_t length) {
--- a/yarp/util/yp_strpbrk.c
+++ b/yarp/util/yp_strpbrk.c
@ -1,5 +1,42 @@
 #include "yarp/util/yp_strpbrk.h"
 // This is the slow path that does care about the encoding.
 static inline const char *
 yp_strpbrk_multi_byte(yp_parser_t *parser, const char *source, const char *charset, size_t maximum) {
    size_t index = 0;
    while (index < maximum) {
        if (strchr(charset, source[index]) != NULL) {
            return source + index;
        }
        size_t width = parser->encoding.char_width(source + index);
        if (width == 0) {
            return NULL;
        }
        index += width;
    }
    return NULL;
 }
 // This is the fast path that does not care about the encoding.
 static inline const char *
 yp_strpbrk_single_byte(const char *source, const char *charset, size_t maximum) {
    size_t index = 0;
    while (index < maximum) {
        if (strchr(charset, source[index]) != NULL) {
            return source + index;
        }
        index++;
    }
    return NULL;
 }
 // Here we have rolled our own version of strpbrk. The standard library strpbrk
 // has undefined behavior when the source string is not null-terminated. We want
 // to support strings that are not null-terminated because yp_parse does not
@ -12,19 +49,18 @@
 // also don't want it to stop on null bytes. Ruby actually allows null bytes
 // within strings, comments, regular expressions, etc. So we need to be able to
 // skip past them.
 //
 // Finally, we want to support encodings wherein the charset could contain
 // characters that are trailing bytes of multi-byte characters. For example, in
 // Shift-JIS, the backslash character can be a trailing byte. In that case we
 // need to take a slower path and iterate one multi-byte character at a time.
 const char *
-yp_strpbrk(const char *source, const char *charset, ptrdiff_t length) {
+yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length) {
-    if (length < 0) return NULL;
+    if (length <= 0) {
-
+        return NULL;
-    size_t index = 0;
+    } else if (parser->encoding_changed && parser->encoding.multibyte) {
-    size_t maximum = (size_t) length;
+        return yp_strpbrk_multi_byte(parser, source, charset, (size_t) length);
-
+    } else {
-    while (index < maximum) {
+        return yp_strpbrk_single_byte(source, charset, (size_t) length);
        if (strchr(charset, source[index]) != NULL) {
            return &source[index];
        }
        index++;
    }
    return NULL;
 }
--- a/yarp/util/yp_strpbrk.h
+++ b/yarp/util/yp_strpbrk.h
@ -2,6 +2,7 @@
 #define YP_STRPBRK_H
 #include "yarp/defines.h"
 #include "yarp/parser.h"
 #include <stddef.h>
 #include <string.h>
@ -18,6 +19,11 @@
 // also don't want it to stop on null bytes. Ruby actually allows null bytes
 // within strings, comments, regular expressions, etc. So we need to be able to
 // skip past them.
-const char * yp_strpbrk(const char *source, const char *charset, ptrdiff_t length);
+//
 // Finally, we want to support encodings wherein the charset could contain
 // characters that are trailing bytes of multi-byte characters. For example, in
 // Shift-JIS, the backslash character can be a trailing byte. In that case we
 // need to take a slower path and iterate one multi-byte character at a time.
 const char * yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length);
 #endif
--- a/yarp/version.h
+++ b/yarp/version.h
@ -0,0 +1,5 @@
 #define YP_VERSION_MAJOR 0
 #define YP_VERSION_MINOR 4
 #define YP_VERSION_PATCH 0
 #define YP_VERSION "0.4.0"
--- a/yarp/yarp.c
+++ b/yarp/yarp.c
@ -1,16 +1,19 @@
 #include "yarp.h"
 #include "yarp/version.h"
-#define YP_STRINGIZE0(expr) #expr
+// The YARP version and the serialization format.
 #define YP_STRINGIZE(expr) YP_STRINGIZE0(expr)
 #define YP_VERSION_MACRO YP_STRINGIZE(YP_VERSION_MAJOR) "." YP_STRINGIZE(YP_VERSION_MINOR) "." YP_STRINGIZE(YP_VERSION_PATCH)
 #define YP_TAB_WHITESPACE_SIZE 8
 const char *
 yp_version(void) {
-    return YP_VERSION_MACRO;
+    return YP_VERSION;
 }
 // In heredocs, tabs automatically complete up to the next 8 spaces. This is
 // defined in CRuby as TAB_WIDTH.
 #define YP_TAB_WHITESPACE_SIZE 8
 // Debugging logging will provide you will additional debugging functions as
 // well as automatically replace some functions with their debugging
 // counterparts.
 #ifndef YP_DEBUG_LOGGING
 #define YP_DEBUG_LOGGING 0
 #endif
@ -442,6 +445,7 @@ not_provided(yp_parser_t *parser) {
    return (yp_token_t) { .type = YP_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
 }
 #define YP_EMPTY_STRING ((yp_string_t) { .type = YP_STRING_SHARED, .as.shared.start = NULL, .as.shared.end = NULL })
 #define YP_LOCATION_NULL_VALUE(parser) ((yp_location_t) { .start = parser->start, .end = parser->start })
 #define YP_LOCATION_TOKEN_VALUE(token) ((yp_location_t) { .start = (token)->start, .end = (token)->end })
 #define YP_LOCATION_NODE_VALUE(node) ((yp_location_t) { .start = (node)->location.start, .end = (node)->location.end })
@ -675,7 +679,9 @@ yp_array_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *node
        .constant = NULL,
        .rest = NULL,
        .requireds = YP_EMPTY_NODE_LIST,
-        .posts = YP_EMPTY_NODE_LIST
+        .posts = YP_EMPTY_NODE_LIST,
        .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
        .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
    };
    // For now we're going to just copy over each pointer manually. This could be
@ -684,7 +690,7 @@ yp_array_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *node
    for (size_t index = 0; index < nodes->size; index++) {
        yp_node_t *child = nodes->nodes[index];
-        if (child->type == YP_NODE_SPLAT_NODE) {
+        if (!found_rest && child->type == YP_NODE_SPLAT_NODE) {
            node->rest = child;
            found_rest = true;
        } else if (found_rest) {
@ -710,7 +716,9 @@ yp_array_pattern_node_rest_create(yp_parser_t *parser, yp_node_t *rest) {
        .constant = NULL,
        .rest = rest,
        .requireds = YP_EMPTY_NODE_LIST,
-        .posts = YP_EMPTY_NODE_LIST
+        .posts = YP_EMPTY_NODE_LIST,
        .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
        .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
    };
    return node;
@ -1885,7 +1893,9 @@ yp_find_pattern_node_create(yp_parser_t *parser, yp_node_list_t *nodes) {
        .constant = NULL,
        .left = left,
        .right = right,
-        .requireds = YP_EMPTY_NODE_LIST
+        .requireds = YP_EMPTY_NODE_LIST,
        .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
        .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
    };
    // For now we're going to just copy over each pointer manually. This could be
@ -2018,7 +2028,9 @@ yp_hash_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *assoc
        },
        .constant = NULL,
        .kwrest = NULL,
-        .assocs = YP_EMPTY_NODE_LIST
+        .assocs = YP_EMPTY_NODE_LIST,
        .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
        .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
    };
    for (size_t index = 0; index < assocs->size; index++) {
@ -3709,7 +3721,8 @@ yp_string_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_t
        },
        .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
        .content_loc = YP_LOCATION_TOKEN_VALUE(content),
-        .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing)
+        .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
        .unescaped = YP_EMPTY_STRING
    };
    return node;
@ -3766,7 +3779,8 @@ yp_symbol_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_t
        },
        .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
        .value_loc = YP_LOCATION_TOKEN_VALUE(value),
-        .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing)
+        .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
        .unescaped = YP_EMPTY_STRING
    };
    return node;
@ -3788,7 +3802,7 @@ yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
            ptrdiff_t length = label.end - label.start;
            assert(length >= 0);
-            yp_unescape_manipulate_string(label.start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
+            yp_unescape_manipulate_string(parser, label.start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
            break;
        }
        case YP_TOKEN_MISSING: {
@ -4073,7 +4087,8 @@ yp_xstring_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_
        },
        .opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
        .content_loc = YP_LOCATION_TOKEN_VALUE(content),
-        .closing_loc = YP_LOCATION_TOKEN_VALUE(closing)
+        .closing_loc = YP_LOCATION_TOKEN_VALUE(closing),
        .unescaped = YP_EMPTY_STRING
    };
    return node;
@ -4113,6 +4128,7 @@ yp_yield_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_lo
 }
 #undef YP_EMPTY_STRING
 #undef YP_LOCATION_NULL_VALUE
 #undef YP_LOCATION_TOKEN_VALUE
 #undef YP_LOCATION_NODE_VALUE
@ -4331,6 +4347,17 @@ peek(yp_parser_t *parser) {
    }
 }
 // Get the next string of length len in the source starting from parser->current.end.
 // If the string extends beyond the end of the source, return the empty string ""
 static inline const char*
 peek_string(yp_parser_t *parser, size_t len) {
    if (parser->current.end + len <= parser->end) {
        return parser->current.end;
    } else {
        return "";
    }
 }
 // If the character to be read matches the given value, then returns true and
 // advanced the current pointer.
 static inline bool
@ -4342,22 +4369,53 @@ match(yp_parser_t *parser, char value) {
    return false;
 }
 // Skip to the next newline character or NUL byte.
 static inline const char *
 next_newline(const char *cursor, ptrdiff_t length) {
    assert(length >= 0);
    // Note that it's okay for us to use memchr here to look for \n because none
    // of the encodings that we support have \n as a component of a multi-byte
    // character.
    return memchr(cursor, '\n', (size_t) length);
 }
 // Find the start of the encoding comment. This is effectively an inlined
 // version of strnstr with some modifications.
 static inline const char *
 parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdiff_t remaining) {
    assert(remaining >= 0);
    size_t length = (size_t) remaining;
    size_t key_length = strlen("coding:");
    if (key_length > length) return NULL;
    const char *cursor_limit = cursor + length - key_length + 1;
    while ((cursor = yp_memchr(parser, cursor, 'c', (size_t) (cursor_limit - cursor))) != NULL) {
        if (
            (strncmp(cursor, "coding", key_length - 1) == 0) &&
            (cursor[key_length - 1] == ':' || cursor[key_length - 1] == '=')
        ) {
            return cursor + key_length;
        }
        cursor++;
    }
    return NULL;
 }
 // Here we're going to check if this is a "magic" comment, and perform whatever
 // actions are necessary for it here.
 static void
 parser_lex_encoding_comment(yp_parser_t *parser) {
    const char *start = parser->current.start + 1;
-    const char *end = memchr(start, '\n', (size_t) (parser->end - start));
+    const char *end = next_newline(start, parser->end - start);
    if (end == NULL) end = parser->end;
    // These are the patterns we're going to match to find the encoding comment.
    // This is definitely not complete or even really correct.
-    const char *encoding_start = NULL;
+    const char *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
    if ((encoding_start = yp_strnstr(start, "coding:", (size_t) (end - start))) != NULL) {
        encoding_start += 7;
    } else if ((encoding_start = yp_strnstr(start, "coding=", (size_t) (end - start))) != NULL) {
        encoding_start += 7;
    }
    // If we didn't find anything that matched our patterns, then return. Note
    // that this does a _very_ poor job of actually finding the encoding, and
@ -4370,7 +4428,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
    // Now determine the end of the encoding string. This is either the end of
    // the line, the first whitespace character, or a punctuation mark.
-    const char *encoding_end = yp_strpbrk(encoding_start, " \t\f\r\v\n;,", end - encoding_start);
+    const char *encoding_end = yp_strpbrk(parser, encoding_start, " \t\f\r\v\n;,", end - encoding_start);
    encoding_end = encoding_end == NULL ? end : encoding_end;
    // Finally, we can determine the width of the encoding string.
@ -4392,7 +4450,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
    // Extensions like utf-8 can contain extra encoding details like,
    // utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
    // treat any encoding starting utf-8 as utf-8.
-    if (strncasecmp(encoding_start, "utf-8", 5) == 0) {
+    if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, "utf-8", 5) == 0)) {
        // We don't need to do anything here because the default encoding is
        // already UTF-8. We'll just return.
        return;
@ -4401,7 +4459,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
    // Next, we're going to loop through each of the encodings that we handle
    // explicitly. If we found one that we understand, we'll use that value.
 #define ENCODING(value, prebuilt) \
-    if (width == sizeof(value) - 1 && strncasecmp(encoding_start, value, sizeof(value) - 1) == 0) { \
+    if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, value, width) == 0) { \
        parser->encoding = prebuilt; \
        parser->encoding_changed |= true; \
        if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
@ -4866,7 +4924,8 @@ static yp_token_type_t
 lex_keyword(yp_parser_t *parser, const char *value, yp_lex_state_t state, yp_token_type_t type, yp_token_type_t modifier_type) {
    yp_lex_state_t last_state = parser->lex_state;
-    if (strncmp(parser->current.start, value, strlen(value)) == 0) {
+    const size_t vlen = strlen(value);
    if (parser->current.start + vlen <= parser->end && strncmp(parser->current.start, value, vlen) == 0) {
        if (parser->lex_state & YP_LEX_STATE_FNAME) {
            lex_state_set(parser, YP_LEX_STATE_ENDFN);
        } else {
@ -5275,7 +5334,7 @@ parser_comment(yp_parser_t *parser, yp_comment_type_t type) {
 static yp_token_type_t
 lex_embdoc(yp_parser_t *parser) {
    // First, lex out the EMBDOC_BEGIN token.
-    const char *newline = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
+    const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
    if (newline == NULL) {
        parser->current.end = parser->end;
@ -5300,7 +5359,7 @@ lex_embdoc(yp_parser_t *parser) {
        // token here.
        if (strncmp(parser->current.end, "=end", 4) == 0 &&
                (parser->current.end + 4 == parser->end || yp_char_is_whitespace(parser->current.end[4]))) {
-            const char *newline = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
+            const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
            if (newline == NULL) {
                parser->current.end = parser->end;
@ -5320,7 +5379,7 @@ lex_embdoc(yp_parser_t *parser) {
        // Otherwise, we'll parse until the end of the line and return a line of
        // embedded documentation.
-        const char *newline = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
+        const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);
        if (newline == NULL) {
            parser->current.end = parser->end;
@ -5466,9 +5525,9 @@ parser_lex(yp_parser_t *parser) {
                    LEX(YP_TOKEN_EOF);
                case '#': { // comments
-                    const char *ending = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
+                    const char *ending = next_newline(parser->current.end, parser->end - parser->current.end);
                    while (ending && ending < parser->end && *ending != '\n') {
-                        ending = memchr(ending + 1, '\n', (size_t) (parser->end - ending));
+                        ending = next_newline(ending + 1, parser->end - ending);
                    }
                    parser->current.end = ending == NULL ? parser->end : ending + 1;
@ -5540,7 +5599,7 @@ parser_lex(yp_parser_t *parser) {
                        // Otherwise we'll return a regular newline.
                        if (next_content[0] == '#') {
                            // Here we look for a "." or "&." following a "\n".
-                            const char *following = memchr(next_content, '\n', (size_t) (parser->end - next_content));
+                            const char *following = next_newline(next_content, parser->end - next_content);
                            while (following && (following < parser->end)) {
                                following++;
@ -5552,7 +5611,7 @@ parser_lex(yp_parser_t *parser) {
                                // If there is a comment, then we need to find the end of the
                                // comment and continue searching from there.
-                                following = memchr(following, '\n', (size_t) (parser->end - following));
+                                following = next_newline(following, parser->end - following);
                            }
                            // If the lex state was ignored, or we hit a '.' or a '&.',
@ -5785,7 +5844,7 @@ parser_lex(yp_parser_t *parser) {
                // = => =~ == === =begin
                case '=':
-                    if (current_token_starts_line(parser) && strncmp(parser->current.end, "begin", 5) == 0 && yp_char_is_whitespace(parser->current.end[5])) {
+                    if (current_token_starts_line(parser) && strncmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_at(parser, 5))) {
                        yp_token_type_t type = lex_embdoc(parser);
                        if (type == YP_TOKEN_EOF) {
@ -5848,19 +5907,21 @@ parser_lex(yp_parser_t *parser) {
                            const char *ident_start = parser->current.end;
                            size_t width = 0;
-                            if (quote == YP_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
+                            if (parser->current.end >= parser->end) {
                                parser->current.end = end;
                            } else if (quote == YP_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
                                parser->current.end = end;
                            } else {
                                if (quote == YP_HEREDOC_QUOTE_NONE) {
                                    parser->current.end += width;
-                                    while ((width = char_is_identifier(parser, parser->current.end))) {
+                                    while ((parser->current.end < parser->end) && (width = char_is_identifier(parser, parser->current.end))) {
                                        parser->current.end += width;
                                    }
                                } else {
                                    // If we have quotes, then we're going to go until we find the
                                    // end quote.
-                                    while (parser->current.end < parser->end && quote != (yp_heredoc_quote_t) (*parser->current.end)) {
+                                    while ((parser->current.end < parser->end) && quote != (yp_heredoc_quote_t) (*parser->current.end)) {
                                        parser->current.end++;
                                    }
                                }
@ -5882,7 +5943,7 @@ parser_lex(yp_parser_t *parser) {
                                });
                                if (parser->heredoc_end == NULL) {
-                                    const char *body_start = (const char *) memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
+                                    const char *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
                                    if (body_start == NULL) {
                                        // If there is no newline after the heredoc identifier, then
@ -6465,13 +6526,13 @@ parser_lex(yp_parser_t *parser) {
            // Here we'll get a list of the places where strpbrk should break,
            // and then find the first one.
            const char *breakpoints = parser->lex_modes.current->as.list.breakpoints;
-            const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
+            const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
            while (breakpoint != NULL) {
                switch (*breakpoint) {
                    case '\0':
                        // If we hit a null byte, skip directly past it.
-                        breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                        break;
                    case '\\': {
                        // If we hit escapes, then we need to treat the next token
@ -6492,7 +6553,7 @@ parser_lex(yp_parser_t *parser) {
                            yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
                        }
-                        breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
+                        breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
                        break;
                    }
                    case ' ':
@ -6517,7 +6578,7 @@ parser_lex(yp_parser_t *parser) {
                            // that looked like an interpolated class or instance variable
                            // like "#@" but wasn't actually. In this case we'll just skip
                            // to the next breakpoint.
-                            breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
+                            breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                            break;
                        }
                    }
@ -6526,7 +6587,7 @@ parser_lex(yp_parser_t *parser) {
                        if (*breakpoint == parser->lex_modes.current->as.list.incrementor) {
                            // If we've hit the incrementor, then we need to skip past it and
                            // find the next breakpoint.
-                            breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                            breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                            parser->lex_modes.current->as.list.nesting++;
                            break;
                        }
@ -6537,7 +6598,7 @@ parser_lex(yp_parser_t *parser) {
                        // If this terminator doesn't actually close the list, then we need
                        // to continue on past it.
                        if (parser->lex_modes.current->as.list.nesting > 0) {
-                            breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                            breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                            parser->lex_modes.current->as.list.nesting--;
                            break;
                        }
@ -6577,13 +6638,13 @@ parser_lex(yp_parser_t *parser) {
            // regular expression. We'll use strpbrk to find the first of these
            // characters.
            const char *breakpoints = parser->lex_modes.current->as.regexp.breakpoints;
-            const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
+            const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
            while (breakpoint != NULL) {
                switch (*breakpoint) {
                    case '\0':
                        // If we hit a null byte, skip directly past it.
-                        breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                        break;
                    case '\\': {
                        // If we hit escapes, then we need to treat the next token
@ -6597,7 +6658,7 @@ parser_lex(yp_parser_t *parser) {
                            yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
                        }
-                        breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
+                        breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
                        break;
                    }
                    case '#': {
@ -6613,7 +6674,7 @@ parser_lex(yp_parser_t *parser) {
                            // that looked like an interpolated class or instance variable
                            // like "#@" but wasn't actually. In this case we'll just skip
                            // to the next breakpoint.
-                            breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
+                            breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                            break;
                        }
                    }
@ -6622,7 +6683,7 @@ parser_lex(yp_parser_t *parser) {
                        if (*breakpoint == parser->lex_modes.current->as.regexp.incrementor) {
                            // If we've hit the incrementor, then we need to skip past it and
                            // find the next breakpoint.
-                            breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                            breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                            parser->lex_modes.current->as.regexp.nesting++;
                            break;
                        }
@ -6635,7 +6696,7 @@ parser_lex(yp_parser_t *parser) {
                            if (parser->lex_modes.current->as.regexp.terminator != '\n') {
                                // If the terminator is not a newline, then we
                                // can set the next breakpoint and continue.
-                                breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                                breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                                break;
                            }
@ -6646,7 +6707,7 @@ parser_lex(yp_parser_t *parser) {
                        assert(*breakpoint == parser->lex_modes.current->as.regexp.terminator);
                        if (parser->lex_modes.current->as.regexp.nesting > 0) {
-                            breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                            breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                            parser->lex_modes.current->as.regexp.nesting--;
                            break;
                        }
@ -6694,7 +6755,7 @@ parser_lex(yp_parser_t *parser) {
            // These are the places where we need to split up the content of the
            // string. We'll use strpbrk to find the first of these characters.
            const char *breakpoints = parser->lex_modes.current->as.string.breakpoints;
-            const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
+            const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
            while (breakpoint != NULL) {
                // If we hit the incrementor, then we'll increment then nesting and
@ -6704,7 +6765,7 @@ parser_lex(yp_parser_t *parser) {
                    *breakpoint == parser->lex_modes.current->as.string.incrementor
                ) {
                    parser->lex_modes.current->as.string.nesting++;
-                    breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                    breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                    continue;
                }
@ -6715,7 +6776,7 @@ parser_lex(yp_parser_t *parser) {
                    // If this terminator doesn't actually close the string, then we need
                    // to continue on past it.
                    if (parser->lex_modes.current->as.string.nesting > 0) {
-                        breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                        parser->lex_modes.current->as.string.nesting--;
                        continue;
                    }
@ -6762,7 +6823,7 @@ parser_lex(yp_parser_t *parser) {
                if (*breakpoint == '\n') {
                    if (parser->heredoc_end == NULL) {
                        yp_newline_list_append(&parser->newline_list, breakpoint);
-                        breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                        continue;
                    } else {
                        parser->current.end = breakpoint + 1;
@ -6774,7 +6835,7 @@ parser_lex(yp_parser_t *parser) {
                switch (*breakpoint) {
                    case '\0':
                        // Skip directly past the null character.
-                        breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                        break;
                    case '\\': {
                        // If we hit escapes, then we need to treat the next token
@ -6789,7 +6850,7 @@ parser_lex(yp_parser_t *parser) {
                            yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
                        }
-                        breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
+                        breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
                        break;
                    }
                    case '#': {
@ -6802,7 +6863,7 @@ parser_lex(yp_parser_t *parser) {
                        // looked like an interpolated class or instance variable like "#@"
                        // but wasn't actually. In this case we'll just skip to the next
                        // breakpoint.
-                        breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
+                        breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                        break;
                    }
                    default:
@ -6844,7 +6905,7 @@ parser_lex(yp_parser_t *parser) {
                    start += yp_strspn_inline_whitespace(start, parser->end - start);
                }
-                if (strncmp(start, ident_start, ident_length) == 0) {
+                if ((start + ident_length <= parser->end) && (strncmp(start, ident_start, ident_length) == 0)) {
                    bool matched = true;
                    bool at_end = false;
@ -6888,13 +6949,13 @@ parser_lex(yp_parser_t *parser) {
                breakpoints[2] = '\0';
            }
-            const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
+            const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
            while (breakpoint != NULL) {
                switch (*breakpoint) {
                    case '\0':
                        // Skip directly past the null character.
-                        breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                        break;
                    case '\n': {
                        yp_newline_list_append(&parser->newline_list, breakpoint);
@ -6939,7 +7000,7 @@ parser_lex(yp_parser_t *parser) {
                        // Otherwise we hit a newline and it wasn't followed by a
                        // terminator, so we can continue parsing.
-                        breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                        break;
                    }
                    case '\\': {
@ -6956,7 +7017,7 @@ parser_lex(yp_parser_t *parser) {
                                yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
                            }
-                            breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
+                            breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
                        }
                        break;
                    }
@ -6970,7 +7031,7 @@ parser_lex(yp_parser_t *parser) {
                        // that looked like an interpolated class or instance variable
                        // like "#@" but wasn't actually. In this case we'll just skip
                        // to the next breakpoint.
-                        breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
+                        breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                        break;
                    }
                    default:
@ -7007,7 +7068,7 @@ yp_regular_expression_node_create_and_unescape(yp_parser_t *parser, const yp_tok
    ptrdiff_t length = content->end - content->start;
    assert(length >= 0);
-    yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
+    yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
    return node;
 }
@ -7018,7 +7079,7 @@ yp_symbol_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
    ptrdiff_t length = content->end - content->start;
    assert(length >= 0);
-    yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
+    yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
    return node;
 }
@ -7029,7 +7090,7 @@ yp_string_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
    ptrdiff_t length = content->end - content->start;
    assert(length >= 0);
-    yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
+    yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
    return node;
 }
@ -7040,7 +7101,7 @@ yp_xstring_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openi
    ptrdiff_t length = content->end - content->start;
    assert(length >= 0);
-    yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
+    yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
    return node;
 }
@ -7505,10 +7566,10 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
                // the previous method name in, and append an =.
                size_t length = yp_string_length(&call->name);
-                char *name = malloc(length + 2);
+                char *name = calloc(length + 2, sizeof(char));
                if (name == NULL) return NULL;
-                sprintf(name, "%.*s=", (int) length, yp_string_source(&call->name));
+                yp_snprintf(name, length + 2, "%.*s=", (int) length, yp_string_source(&call->name));
                // Now switch the name to the new string.
                yp_string_free(&call->name);
@ -8954,9 +9015,11 @@ parse_string_part(yp_parser_t *parser) {
 static yp_node_t *
 parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_state) {
    bool lex_string = lex_mode->mode == YP_LEX_STRING;
    bool lex_interpolation = lex_string && lex_mode->as.string.interpolation;
    yp_token_t opening = parser->previous;
-    if (lex_mode->mode != YP_LEX_STRING) {
+    if (!lex_string) {
        if (next_state != YP_LEX_STATE_NONE) {
            lex_state_set(parser, next_state);
        }
@ -8990,9 +9053,9 @@ parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_s
    }
    // If we weren't in a string in the previous check then we have to be now.
-    assert(lex_mode->mode == YP_LEX_STRING);
+    assert(lex_string);
-    if (lex_mode->as.string.interpolation) {
+    if (lex_interpolation) {
        yp_interpolated_symbol_node_t *interpolated = yp_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
        while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
@ -9043,9 +9106,10 @@ parse_undef_argument(yp_parser_t *parser) {
            return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL);
        }
        case YP_TOKEN_SYMBOL_BEGIN: {
-            yp_lex_mode_t *lex_mode = parser->lex_modes.current;
+            yp_lex_mode_t lex_mode = *parser->lex_modes.current;
            parser_lex(parser);
-            return parse_symbol(parser, lex_mode, YP_LEX_STATE_NONE);
+
            return parse_symbol(parser, &lex_mode, YP_LEX_STATE_NONE);
        }
        default:
            yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Expected a bare word or symbol argument.");
@ -9075,10 +9139,10 @@ parse_alias_argument(yp_parser_t *parser, bool first) {
            return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL);
        }
        case YP_TOKEN_SYMBOL_BEGIN: {
-            yp_lex_mode_t *lex_mode = parser->lex_modes.current;
+            yp_lex_mode_t lex_mode = *parser->lex_modes.current;
            parser_lex(parser);
-            return parse_symbol(parser, lex_mode, first ? YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM : YP_LEX_STATE_NONE);
+            return parse_symbol(parser, &lex_mode, first ? YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM : YP_LEX_STATE_NONE);
        }
        case YP_TOKEN_BACK_REFERENCE:
            parser_lex(parser);
@ -9177,7 +9241,7 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
                    common_whitespace = cur_whitespace;
                }
-                cur_char = memchr(cur_char + 1, '\n', (size_t) (parser->end - (cur_char + 1)));
+                cur_char = next_newline(cur_char + 1, parser->end - (cur_char + 1));
                if (cur_char) cur_char++;
            }
        }
@ -9252,7 +9316,7 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
            // At this point we have dedented all that we need to, so we need to find
            // the next newline.
-            const char *breakpoint = memchr(source_cursor, '\n', (size_t) (source_end - source_cursor));
+            const char *breakpoint = next_newline(source_cursor, source_end - source_cursor);
            if (breakpoint == NULL) {
                // If there isn't another newline, then we can just move the rest of the
@ -9293,92 +9357,106 @@ parse_pattern_constant_path(yp_parser_t *parser, yp_node_t *node) {
    // If there is a [ or ( that follows, then this is part of a larger pattern
    // expression. We'll parse the inner pattern here, then modify the returned
    // inner pattern with our constant path attached.
-    if (match_any_type_p(parser, 2, YP_TOKEN_BRACKET_LEFT, YP_TOKEN_PARENTHESIS_LEFT)) {
+    if (!match_any_type_p(parser, 2, YP_TOKEN_BRACKET_LEFT, YP_TOKEN_PARENTHESIS_LEFT)) {
-        yp_token_t opening;
+        return node;
        yp_token_t closing;
        yp_node_t *inner = NULL;
        if (accept(parser, YP_TOKEN_BRACKET_LEFT)) {
            opening = parser->previous;
            accept(parser, YP_TOKEN_NEWLINE);
            if (!accept(parser, YP_TOKEN_BRACKET_RIGHT)) {
                inner = parse_pattern(parser, true, "Expected a pattern expression after the [ operator.");
                accept(parser, YP_TOKEN_NEWLINE);
                expect(parser, YP_TOKEN_BRACKET_RIGHT, "Expected a ] to close the pattern expression.");
            }
            closing = parser->previous;
        } else {
            parser_lex(parser);
            opening = parser->previous;
            if (!accept(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
                inner = parse_pattern(parser, true, "Expected a pattern expression after the ( operator.");
                expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected a ) to close the pattern expression.");
            }
            closing = parser->previous;
        }
        if (inner) {
            // Now that we have the inner pattern, check to see if it's an array, find,
            // or hash pattern. If it is, then we'll attach our constant path to it. If
            // it's not, then we'll create an array pattern.
            switch (inner->type) {
                case YP_NODE_ARRAY_PATTERN_NODE: {
                    yp_array_pattern_node_t *pattern_node = (yp_array_pattern_node_t *)inner;
                    pattern_node->base.location.start = node->location.start;
                    pattern_node->base.location.end = closing.end;
                    pattern_node->constant = node;
                    pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
                    pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
                    node = (yp_node_t *)pattern_node;
                    break;
                }
                case YP_NODE_FIND_PATTERN_NODE: {
                    yp_find_pattern_node_t *pattern_node = (yp_find_pattern_node_t *) inner;
                    pattern_node->base.location.start = node->location.start;
                    pattern_node->base.location.end = closing.end;
                    pattern_node->constant = node;
                    pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
                    pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
                    node = (yp_node_t *) pattern_node;
                    break;
                }
                case YP_NODE_HASH_PATTERN_NODE: {
                    yp_hash_pattern_node_t *pattern_node = (yp_hash_pattern_node_t *)inner;
                    pattern_node->base.location.start = node->location.start;
                    pattern_node->base.location.end = closing.end;
                    pattern_node->constant = node;
                    pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
                    pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
                    node = (yp_node_t *) pattern_node;
                    break;
                }
                default: {
                    yp_array_pattern_node_t *pattern_node = yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
                    yp_array_pattern_node_requireds_append(pattern_node, inner);
                    node = (yp_node_t *)pattern_node;
                    break;
                }
            }
        } else {
            // If there was no inner pattern, then we have something like Foo() or
            // Foo[]. In that case we'll create an array pattern with no requireds.
            node = (yp_node_t *)yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
        }
    }
-    return node;
+    yp_token_t opening;
    yp_token_t closing;
    yp_node_t *inner = NULL;
    if (accept(parser, YP_TOKEN_BRACKET_LEFT)) {
        opening = parser->previous;
        accept(parser, YP_TOKEN_NEWLINE);
        if (!accept(parser, YP_TOKEN_BRACKET_RIGHT)) {
            inner = parse_pattern(parser, true, "Expected a pattern expression after the [ operator.");
            accept(parser, YP_TOKEN_NEWLINE);
            expect(parser, YP_TOKEN_BRACKET_RIGHT, "Expected a ] to close the pattern expression.");
        }
        closing = parser->previous;
    } else {
        parser_lex(parser);
        opening = parser->previous;
        if (!accept(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
            inner = parse_pattern(parser, true, "Expected a pattern expression after the ( operator.");
            expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected a ) to close the pattern expression.");
        }
        closing = parser->previous;
    }
    if (!inner) {
        // If there was no inner pattern, then we have something like Foo() or
        // Foo[]. In that case we'll create an array pattern with no requireds.
        return (yp_node_t *) yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
    }
    // Now that we have the inner pattern, check to see if it's an array, find,
    // or hash pattern. If it is, then we'll attach our constant path to it if
    // it doesn't already have a constant. If it's not one of those node types
    // or it does have a constant, then we'll create an array pattern.
    switch (inner->type) {
        case YP_NODE_ARRAY_PATTERN_NODE: {
            yp_array_pattern_node_t *pattern_node = (yp_array_pattern_node_t *) inner;
            if (pattern_node->constant == NULL) {
                pattern_node->base.location.start = node->location.start;
                pattern_node->base.location.end = closing.end;
                pattern_node->constant = node;
                pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
                pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
                return (yp_node_t *) pattern_node;
            }
            break;
        }
        case YP_NODE_FIND_PATTERN_NODE: {
            yp_find_pattern_node_t *pattern_node = (yp_find_pattern_node_t *) inner;
            if (pattern_node->constant == NULL) {
                pattern_node->base.location.start = node->location.start;
                pattern_node->base.location.end = closing.end;
                pattern_node->constant = node;
                pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
                pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
                return (yp_node_t *) pattern_node;
            }
            break;
        }
        case YP_NODE_HASH_PATTERN_NODE: {
            yp_hash_pattern_node_t *pattern_node = (yp_hash_pattern_node_t *) inner;
            if (pattern_node->constant == NULL) {
                pattern_node->base.location.start = node->location.start;
                pattern_node->base.location.end = closing.end;
                pattern_node->constant = node;
                pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
                pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
                return (yp_node_t *) pattern_node;
            }
            break;
        }
        default:
            break;
    }
    // If we got here, then we didn't return one of the inner patterns by
    // attaching its constant. In this case we'll create an array pattern and
    // attach our constant to it.
    yp_array_pattern_node_t *pattern_node = yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
    yp_array_pattern_node_requireds_append(pattern_node, inner);
    return (yp_node_t *) pattern_node;
 }
 // Parse a rest pattern.
@ -9897,8 +9975,6 @@ parse_pattern(yp_parser_t *parser, bool top_pattern, const char *message) {
 // Parse an expression that begins with the previous node that we just lexed.
 static inline yp_node_t *
 parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
    yp_lex_mode_t *lex_mode = parser->lex_modes.current;
    switch (parser->current.type) {
        case YP_TOKEN_BRACKET_LEFT_ARRAY: {
            parser_lex(parser);
@ -11015,7 +11091,10 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
                    lex_state_set(parser, YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM);
                    parser_lex(parser);
                    name = parse_undef_argument(parser);
-                    if (name->type == YP_NODE_MISSING_NODE) break;
+                    if (name->type == YP_NODE_MISSING_NODE) {
                        yp_node_destroy(parser, name);
                        break;
                    }
                    yp_undef_node_append(undef, name);
                }
@ -11043,6 +11122,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
                    receiver = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, "Expected expression after `not`.");
                    if (!parser->recovering) {
                        accept(parser, YP_TOKEN_NEWLINE);
                        expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected ')' after 'not' expression.");
                        arguments.closing_loc = ((yp_location_t) { .start = parser->previous.start, .end = parser->previous.end });
                    }
@ -11727,9 +11807,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
            return (yp_node_t *) node;
        }
        case YP_TOKEN_STRING_BEGIN: {
            assert(parser->lex_modes.current->mode == YP_LEX_STRING);
            bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
            yp_token_t opening = parser->current;
            parser_lex(parser);
            yp_token_t opening = parser->previous;
            yp_node_t *node;
            if (accept(parser, YP_TOKEN_STRING_END)) {
@ -11754,7 +11837,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
                };
                return (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
-            } else if (!lex_mode->as.string.interpolation) {
+            } else if (!lex_interpolation) {
                // If we don't accept interpolation then we expect the string to start
                // with a single string content node.
                expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter.");
@ -11858,9 +11941,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
                return node;
            }
        }
-        case YP_TOKEN_SYMBOL_BEGIN:
+        case YP_TOKEN_SYMBOL_BEGIN: {
            yp_lex_mode_t lex_mode = *parser->lex_modes.current;
            parser_lex(parser);
-            return parse_symbol(parser, lex_mode, YP_LEX_STATE_END);
+
            return parse_symbol(parser, &lex_mode, YP_LEX_STATE_END);
        }
        default:
            if (context_recoverable(parser, &parser->current)) {
                parser->recovering = true;
@ -12482,82 +12568,8 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
                    return path;
                }
-                case YP_TOKEN_AMPERSAND:
+                case YP_CASE_OPERATOR:
-                case YP_TOKEN_BACKTICK:
+                case YP_CASE_KEYWORD:
                case YP_TOKEN_BANG:
                case YP_TOKEN_BANG_EQUAL:
                case YP_TOKEN_BANG_TILDE:
                case YP_TOKEN_CARET:
                case YP_TOKEN_EQUAL_EQUAL:
                case YP_TOKEN_EQUAL_EQUAL_EQUAL:
                case YP_TOKEN_EQUAL_TILDE:
                case YP_TOKEN_GREATER:
                case YP_TOKEN_GREATER_EQUAL:
                case YP_TOKEN_GREATER_GREATER:
                case YP_TOKEN_HEREDOC_START:
                case YP_TOKEN_IGNORED_NEWLINE:
                case YP_TOKEN_KEYWORD_ALIAS:
                case YP_TOKEN_KEYWORD_AND:
                case YP_TOKEN_KEYWORD_BEGIN:
                case YP_TOKEN_KEYWORD_BEGIN_UPCASE:
                case YP_TOKEN_KEYWORD_BREAK:
                case YP_TOKEN_KEYWORD_CASE:
                case YP_TOKEN_KEYWORD_CLASS:
                case YP_TOKEN_KEYWORD_DEF:
                case YP_TOKEN_KEYWORD_DEFINED:
                case YP_TOKEN_KEYWORD_DO:
                case YP_TOKEN_KEYWORD_ELSE:
                case YP_TOKEN_KEYWORD_ELSIF:
                case YP_TOKEN_KEYWORD_END:
                case YP_TOKEN_KEYWORD_END_UPCASE:
                case YP_TOKEN_KEYWORD_ENSURE:
                case YP_TOKEN_KEYWORD_FALSE:
                case YP_TOKEN_KEYWORD_FOR:
                case YP_TOKEN_KEYWORD_IF:
                case YP_TOKEN_KEYWORD_IN:
                case YP_TOKEN_KEYWORD_NEXT:
                case YP_TOKEN_KEYWORD_NIL:
                case YP_TOKEN_KEYWORD_NOT:
                case YP_TOKEN_KEYWORD_OR:
                case YP_TOKEN_KEYWORD_REDO:
                case YP_TOKEN_KEYWORD_RESCUE:
                case YP_TOKEN_KEYWORD_RETRY:
                case YP_TOKEN_KEYWORD_RETURN:
                case YP_TOKEN_KEYWORD_SELF:
                case YP_TOKEN_KEYWORD_SUPER:
                case YP_TOKEN_KEYWORD_THEN:
                case YP_TOKEN_KEYWORD_TRUE:
                case YP_TOKEN_KEYWORD_UNDEF:
                case YP_TOKEN_KEYWORD_UNLESS:
                case YP_TOKEN_KEYWORD_UNTIL:
                case YP_TOKEN_KEYWORD_WHEN:
                case YP_TOKEN_KEYWORD_WHILE:
                case YP_TOKEN_KEYWORD_YIELD:
                case YP_TOKEN_KEYWORD___ENCODING__:
                case YP_TOKEN_KEYWORD___FILE__:
                case YP_TOKEN_KEYWORD___LINE__:
                case YP_TOKEN_LESS:
                case YP_TOKEN_LESS_EQUAL:
                case YP_TOKEN_LESS_EQUAL_GREATER:
                case YP_TOKEN_LESS_LESS:
                case YP_TOKEN_MINUS:
                case YP_TOKEN_PERCENT:
                case YP_TOKEN_PERCENT_LOWER_I:
                case YP_TOKEN_PERCENT_LOWER_W:
                case YP_TOKEN_PERCENT_LOWER_X:
                case YP_TOKEN_PERCENT_UPPER_I:
                case YP_TOKEN_PERCENT_UPPER_W:
                case YP_TOKEN_PIPE:
                case YP_TOKEN_PLUS:
                case YP_TOKEN_REGEXP_BEGIN:
                case YP_TOKEN_SLASH:
                case YP_TOKEN_STAR:
                case YP_TOKEN_STAR_STAR:
                case YP_TOKEN_TILDE:
                case YP_TOKEN_UCOLON_COLON:
                case YP_TOKEN_UDOT_DOT:
                case YP_TOKEN_UDOT_DOT_DOT:
                case YP_TOKEN___END__:
                case YP_TOKEN_IDENTIFIER: {
                    parser_lex(parser);
@ -12805,7 +12817,7 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
    } else if (size >= 2 && source[0] == '#' && source[1] == '!') {
        // If the first two bytes of the source are a shebang, then we'll indicate
        // that the encoding comment is at the end of the shebang.
-        const char *encoding_comment_start = memchr(source, '\n', size);
+        const char *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
        if (encoding_comment_start) {
            parser->encoding_comment_start = encoding_comment_start + 1;
        }
@ -12891,6 +12903,3 @@ yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer) {
 #undef YP_CASE_KEYWORD
 #undef YP_CASE_OPERATOR
 #undef YP_CASE_WRITABLE
 #undef YP_STRINGIZE
 #undef YP_STRINGIZE0
 #undef YP_VERSION_MACRO
--- a/yarp/yarp.h
+++ b/yarp/yarp.h
@ -2,19 +2,6 @@
 #define YARP_H
 #include "yarp/defines.h"
 #include <assert.h>
 #include <stdarg.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #ifndef _WIN32
 #include <strings.h>
 #endif
 #include "yarp/missing.h"
 #include "yarp/ast.h"
 #include "yarp/diagnostic.h"
 #include "yarp/node.h"
@ -24,17 +11,26 @@
 #include "yarp/unescape.h"
 #include "yarp/util/yp_buffer.h"
 #include "yarp/util/yp_char.h"
 #include "yarp/util/yp_memchr.h"
 #include "yarp/util/yp_strpbrk.h"
-#define YP_VERSION_MAJOR 0
+#include <assert.h>
-#define YP_VERSION_MINOR 4
+#include <stdarg.h>
-#define YP_VERSION_PATCH 0
+#include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #ifndef _WIN32
 #include <strings.h>
 #endif
 void yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);
 void yp_print_node(yp_parser_t *parser, yp_node_t *node);
-// Returns the YARP version and notably the serialization format
+// The YARP version and the serialization format.
 YP_EXPORTED_FUNCTION const char * yp_version(void);
 // Initialize a parser with the given start and end pointers.
@ -57,20 +53,6 @@ YP_EXPORTED_FUNCTION void yp_parser_free(yp_parser_t *parser);
 // Parse the Ruby source associated with the given parser and return the tree.
 YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser);
 // Deallocate a node and all of its children.
 YP_EXPORTED_FUNCTION void yp_node_destroy(yp_parser_t *parser, struct yp_node *node);
 // This struct stores the information gathered by the yp_node_memsize function.
 // It contains both the memory footprint and additionally metadata about the
 // shape of the tree.
 typedef struct {
    size_t memsize;
    size_t node_count;
 } yp_memsize_t;
 // Calculates the memory footprint of a given node.
 YP_EXPORTED_FUNCTION void yp_node_memsize(yp_node_t *node, yp_memsize_t *memsize);
 // Pretty-prints the AST represented by the given node to the given buffer.
 YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);