Manual YARP resync

Merged: https://github.com/ruby/ruby/pull/8007
2023-06-30 14:30:24 -04:00 · 2023-06-30 14:30:24 -04:00 · bfb933371d · 2023-07-05 20:59:16 +00:00
commit bfb933371d
parent 6f9d1b4b0f
77 changed files with 5222 additions and 5751 deletions
--- a/lib/yarp.rb
+++ b/lib/yarp.rb
@ -1,17 +1,79 @@
 # frozen_string_literal: true

 module YARP
-  # This represents a location in the source corresponding to a node or token.
-  class Location
-    attr_reader :start_offset, :length
+  # This represents a source of Ruby code that has been parsed. It is used in
+  # conjunction with locations to allow them to resolve line numbers and source
+  # ranges.
+  class Source
+    attr_reader :source, :offsets

-    def initialize(start_offset, length)
+    def initialize(source, offsets)
+      @source = source
+      @offsets = offsets
+    end
+
+    def slice(offset, length)
+      source.byteslice(offset, length)
+    end
+
+    def line(value)
+      offsets.bsearch_index { |offset| offset > value } || offsets.length
+    end
+
+    def column(value)
+      value - offsets[line(value) - 1]
+    end
+  end
+
+  # This represents a location in the source.
+  class Location
+    # A Source object that is used to determine more information from the given
+    # offset and length.
+    private attr_reader :source
+
+    # The byte offset from the beginning of the source where this location
+    # starts.
+    attr_reader :start_offset
+
+    # The length of this location in bytes.
+    attr_reader :length
+
+    def initialize(source, start_offset, length)
+      @source = source
      @start_offset = start_offset
      @length = length
    end

+    # The source code that this location represents.
+    def slice
+      source.slice(start_offset, length)
+    end
+
+    # The byte offset from the beginning of the source where this location ends.
    def end_offset
-      @start_offset + @length
+      start_offset + length
+    end
+
+    # The line number where this location starts.
+    def start_line
+      source.line(start_offset)
+    end
+
+    # The line number where this location ends.
+    def end_line
+      source.line(end_offset - 1)
+    end
+
+    # The column number in bytes where this location starts from the start of
+    # the line.
+    def start_column
+      source.column(start_offset)
+    end
+
+    # The column number in bytes where this location ends from the start of the
+    # line.
+    def end_column
+      source.column(end_offset - 1)
    end

    def deconstruct_keys(keys)
@ -101,21 +163,12 @@ module YARP

  # This represents a token from the Ruby source.
  class Token
-    attr_reader :type, :value, :start_offset, :length
+    attr_reader :type, :value, :location

-    def initialize(type, value, start_offset, length)
+    def initialize(type, value, location)
      @type = type
      @value = value
-      @start_offset = start_offset
-      @length = length
-    end
-
-    def end_offset
-      @start_offset + @length
-    end
-
-    def location
-      Location.new(@start_offset, @length)
+      @location = location
    end

    def deconstruct_keys(keys)
@ -143,20 +196,12 @@ module YARP

  # This represents a node in the tree.
  class Node
-    attr_reader :start_offset, :length
-
-    def end_offset
-      @start_offset + @length
-    end
-
-    def location
-      Location.new(@start_offset, @length)
-    end
+    attr_reader :location

    def pretty_print(q)
      q.group do
        q.text(self.class.name.split("::").last)
-        self.location.pretty_print(q)
+        location.pretty_print(q)
        q.text("(")
        q.nest(2) do
          deconstructed = deconstruct_keys([])
@ -171,67 +216,10 @@ module YARP
    end
  end

-  # A class that knows how to walk down the tree. None of the individual visit
-  # methods are implemented on this visitor, so it forces the consumer to
-  # implement each one that they need. For a default implementation that
-  # continues walking the tree, see the Visitor class.
-  class BasicVisitor
-    def visit(node)
-      node&.accept(self)
-    end
-
-    def visit_all(nodes)
-      nodes.map { |node| visit(node) }
-    end
-
-    def visit_child_nodes(node)
-      visit_all(node.child_nodes)
-    end
-  end
-
-  # This lexes with the Ripper lex. It drops any space events but otherwise
-  # returns the same tokens.
-  # [raises SyntaxError] if the syntax in source is invalid
-  def self.lex_ripper(source)
-    previous = []
-    results = []
-
-    Ripper.lex(source, raise_errors: true).each do |token|
-      case token[1]
-      when :on_sp
-        # skip
-      when :on_tstring_content
-        if previous[1] == :on_tstring_content &&
-            (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
-          previous[2] << token[2]
-        else
-          results << token
-          previous = token
-        end
-      when :on_words_sep
-        if previous[1] == :on_words_sep
-          previous[2] << token[2]
-        else
-          results << token
-          previous = token
-        end
-      else
-        results << token
-        previous = token
-      end
-    end
-
-    results
-  end
-
  # Load the serialized AST using the source as a reference into a tree.
  def self.load(source, serialized)
    Serialize.load(source, serialized)
  end
-
-  def self.parse(source, filepath=nil)
-    _parse(source, filepath)
-  end
 end

 require_relative "yarp/lex_compat"
@ -240,9 +228,3 @@ require_relative "yarp/ripper_compat"
 require_relative "yarp/serialize"
 require_relative "yarp/pack"
 require "yarp.so"
-
-module YARP
-  class << self
-    private :_parse
-  end
-end
--- a/lib/yarp/lex_compat.rb
+++ b/lib/yarp/lex_compat.rb
@ -534,12 +534,11 @@ module YARP
      end
    end

-    attr_reader :source, :offsets, :filepath
+    attr_reader :source, :filepath

    def initialize(source, filepath = "")
      @source = source
      @filepath = filepath || ""
-      @offsets = find_offsets(source)
    end

    def result
@ -561,7 +560,8 @@ module YARP
      result_value[0][0].value.prepend("\xEF\xBB\xBF") if bom

      result_value.each_with_index do |(token, lex_state), index|
-        (lineno, column) = find_location(token.location.start_offset)
+        lineno = token.location.start_line
+        column = token.location.start_column
        column -= index == 0 ? 6 : 3 if bom && lineno == 1

        event = RIPPER.fetch(token.type)
@ -702,38 +702,6 @@ module YARP

      ParseResult.new(tokens, result.comments, result.errors, result.warnings)
    end
-
-    private
-
-    # YARP keeps locations around in the form of ranges of byte offsets from the
-    # start of the file. Ripper keeps locations around in the form of line and
-    # column numbers. To match the output, we keep a cache of the offsets at the
-    # beginning of each line.
-    def find_offsets(source)
-      last_offset = 0
-      offsets = [0]
-
-      source.each_line do |line|
-        last_offset += line.bytesize
-        offsets << last_offset
-      end
-
-      offsets
-    end
-
-    # Given a byte offset, find the line number and column number that it maps
-    # to. We use a binary search over the cached offsets to find the line number
-    # that the offset is on, and then subtract the offset of the previous line
-    # to find the column number.
-    def find_location(value)
-      line_number = offsets.bsearch_index { |offset| offset > value }
-      line_offset = offsets[line_number - 1] if line_number
-
-      [
-        line_number || offsets.length - 1,
-        value - (line_offset || offsets.last)
-      ]
-    end
  end

  # The constant that wraps the behavior of the lexer to match Ripper's output
@ -746,4 +714,39 @@ module YARP
  def self.lex_compat(source, filepath = "")
    LexCompat.new(source, filepath).result
  end
+
+  # This lexes with the Ripper lex. It drops any space events but otherwise
+  # returns the same tokens. Raises SyntaxError if the syntax in source is
+  # invalid.
+  def self.lex_ripper(source)
+    previous = []
+    results = []
+
+    Ripper.lex(source, raise_errors: true).each do |token|
+      case token[1]
+      when :on_sp
+        # skip
+      when :on_tstring_content
+        if previous[1] == :on_tstring_content &&
+            (token[2].start_with?("\#$") || token[2].start_with?("\#@"))
+          previous[2] << token[2]
+        else
+          results << token
+          previous = token
+        end
+      when :on_words_sep
+        if previous[1] == :on_words_sep
+          previous[2] << token[2]
+        else
+          results << token
+          previous = token
+        end
+      else
+        results << token
+        previous = token
+      end
+    end
+
+    results
+  end
 end
--- a/lib/yarp/node.rb
+++ b/lib/yarp/node.rb
--- a/lib/yarp/serialize.rb
+++ b/lib/yarp/serialize.rb
@ -9,26 +9,30 @@ require "stringio"

 module YARP
  module Serialize
-    def self.load(source, serialized)
+    def self.load(input, serialized)
      io = StringIO.new(serialized)
      io.set_encoding(Encoding::BINARY)

-      Loader.new(source, serialized, io).load
+      Loader.new(input, serialized, io).load
    end

    class Loader
-      attr_reader :encoding, :source, :serialized, :io
-      attr_reader :constant_pool_offset, :constant_pool
+      attr_reader :encoding, :input, :serialized, :io
+      attr_reader :constant_pool_offset, :constant_pool, :source

-      def initialize(source, serialized, io)
+      def initialize(input, serialized, io)
        @encoding = Encoding::UTF_8

-        @source = source.dup
+        @input = input.dup
        @serialized = serialized
        @io = io

        @constant_pool_offset = nil
        @constant_pool = nil
+
+        offsets = [0]
+        input.b.scan("\n") { offsets << $~.end(0) }
+        @source = Source.new(input, offsets)
      end

      def load
@ -36,7 +40,7 @@ module YARP
        io.read(3).unpack("C3") => [0, 4, 0]

        @encoding = Encoding.find(io.read(load_varint))
-        @source = source.force_encoding(@encoding).freeze
+        @input = input.force_encoding(@encoding).freeze

        @constant_pool_offset = io.read(4).unpack1("L")
        @constant_pool = Array.new(load_varint, nil)
@ -78,7 +82,7 @@ module YARP
      end

      def load_location
-        Location.new(load_varint, load_varint)
+        Location.new(source, load_varint, load_varint)
      end

      def load_optional_location
@ -95,7 +99,7 @@ module YARP
          start = serialized.unpack1("L", offset: offset)
          length = serialized.unpack1("L", offset: offset + 4)

-          constant = source.byteslice(start, length).to_sym
+          constant = input.byteslice(start, length).to_sym
          constant_pool[index] = constant
        end

@ -104,262 +108,262 @@ module YARP

      def load_node
        type = io.getbyte
-        start_offset, length = load_varint, load_varint
+        location = load_location

        case type
        when 1 then
-          AliasNode.new(load_node, load_node, load_location, start_offset, length)
+          AliasNode.new(load_node, load_node, load_location, location)
        when 2 then
-          AlternationPatternNode.new(load_node, load_node, load_location, start_offset, length)
+          AlternationPatternNode.new(load_node, load_node, load_location, location)
        when 3 then
-          AndNode.new(load_node, load_node, load_location, start_offset, length)
+          AndNode.new(load_node, load_node, load_location, location)
        when 4 then
-          ArgumentsNode.new(Array.new(load_varint) { load_node }, start_offset, length)
+          ArgumentsNode.new(Array.new(load_varint) { load_node }, location)
        when 5 then
-          ArrayNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, start_offset, length)
+          ArrayNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, location)
        when 6 then
-          ArrayPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, start_offset, length)
+          ArrayPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_location, load_optional_location, location)
        when 7 then
-          AssocNode.new(load_node, load_optional_node, load_optional_location, start_offset, length)
+          AssocNode.new(load_node, load_optional_node, load_optional_location, location)
        when 8 then
-          AssocSplatNode.new(load_optional_node, load_location, start_offset, length)
+          AssocSplatNode.new(load_optional_node, load_location, location)
        when 9 then
-          BackReferenceReadNode.new(start_offset, length)
+          BackReferenceReadNode.new(location)
        when 10 then
-          BeginNode.new(load_optional_location, load_optional_node, load_optional_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length)
+          BeginNode.new(load_optional_location, load_optional_node, load_optional_node, load_optional_node, load_optional_node, load_optional_location, location)
        when 11 then
-          BlockArgumentNode.new(load_optional_node, load_location, start_offset, length)
+          BlockArgumentNode.new(load_optional_node, load_location, location)
        when 12 then
-          BlockNode.new(Array.new(load_varint) { load_constant }, load_optional_node, load_optional_node, load_location, load_location, start_offset, length)
+          BlockNode.new(Array.new(load_varint) { load_constant }, load_optional_node, load_optional_node, load_location, load_location, location)
        when 13 then
-          BlockParameterNode.new(load_optional_location, load_location, start_offset, length)
+          BlockParameterNode.new(load_optional_location, load_location, location)
        when 14 then
-          BlockParametersNode.new(load_optional_node, Array.new(load_varint) { load_location }, load_optional_location, load_optional_location, start_offset, length)
+          BlockParametersNode.new(load_optional_node, Array.new(load_varint) { load_location }, load_optional_location, load_optional_location, location)
        when 15 then
-          BreakNode.new(load_optional_node, load_location, start_offset, length)
+          BreakNode.new(load_optional_node, load_location, location)
        when 16 then
-          CallNode.new(load_optional_node, load_optional_location, load_optional_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, load_varint, load_string, start_offset, length)
+          CallNode.new(load_optional_node, load_optional_location, load_optional_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, load_varint, load_string, location)
        when 17 then
-          CallOperatorAndWriteNode.new(load_node, load_location, load_node, start_offset, length)
+          CallOperatorAndWriteNode.new(load_node, load_location, load_node, location)
        when 18 then
-          CallOperatorOrWriteNode.new(load_node, load_node, load_location, start_offset, length)
+          CallOperatorOrWriteNode.new(load_node, load_node, load_location, location)
        when 19 then
-          CallOperatorWriteNode.new(load_node, load_location, load_node, load_constant, start_offset, length)
+          CallOperatorWriteNode.new(load_node, load_location, load_node, load_constant, location)
        when 20 then
-          CapturePatternNode.new(load_node, load_node, load_location, start_offset, length)
+          CapturePatternNode.new(load_node, load_node, load_location, location)
        when 21 then
-          CaseNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_location, load_location, start_offset, length)
+          CaseNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_location, load_location, location)
        when 22 then
-          ClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_location, load_optional_node, load_optional_node, load_location, start_offset, length)
+          ClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_location, load_optional_node, load_optional_node, load_location, location)
        when 23 then
-          ClassVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
+          ClassVariableOperatorAndWriteNode.new(load_location, load_location, load_node, location)
        when 24 then
-          ClassVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
+          ClassVariableOperatorOrWriteNode.new(load_location, load_location, load_node, location)
        when 25 then
-          ClassVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
+          ClassVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
        when 26 then
-          ClassVariableReadNode.new(start_offset, length)
+          ClassVariableReadNode.new(location)
        when 27 then
-          ClassVariableWriteNode.new(load_location, load_optional_node, load_optional_location, start_offset, length)
+          ClassVariableWriteNode.new(load_location, load_optional_node, load_optional_location, location)
        when 28 then
-          ConstantOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
+          ConstantOperatorAndWriteNode.new(load_location, load_location, load_node, location)
        when 29 then
-          ConstantOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
+          ConstantOperatorOrWriteNode.new(load_location, load_location, load_node, location)
        when 30 then
-          ConstantOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
+          ConstantOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
        when 31 then
-          ConstantPathNode.new(load_optional_node, load_node, load_location, start_offset, length)
+          ConstantPathNode.new(load_optional_node, load_node, load_location, location)
        when 32 then
-          ConstantPathOperatorAndWriteNode.new(load_node, load_location, load_node, start_offset, length)
+          ConstantPathOperatorAndWriteNode.new(load_node, load_location, load_node, location)
        when 33 then
-          ConstantPathOperatorOrWriteNode.new(load_node, load_location, load_node, start_offset, length)
+          ConstantPathOperatorOrWriteNode.new(load_node, load_location, load_node, location)
        when 34 then
-          ConstantPathOperatorWriteNode.new(load_node, load_location, load_node, load_constant, start_offset, length)
+          ConstantPathOperatorWriteNode.new(load_node, load_location, load_node, load_constant, location)
        when 35 then
-          ConstantPathWriteNode.new(load_node, load_optional_location, load_optional_node, start_offset, length)
+          ConstantPathWriteNode.new(load_node, load_optional_location, load_optional_node, location)
        when 36 then
-          ConstantReadNode.new(start_offset, length)
+          ConstantReadNode.new(location)
        when 37 then
          load_serialized_length
-          DefNode.new(load_location, load_optional_node, load_optional_node, load_optional_node, Array.new(load_varint) { load_constant }, load_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, start_offset, length)
+          DefNode.new(load_location, load_optional_node, load_optional_node, load_optional_node, Array.new(load_varint) { load_constant }, load_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, load_optional_location, location)
        when 38 then
-          DefinedNode.new(load_optional_location, load_node, load_optional_location, load_location, start_offset, length)
+          DefinedNode.new(load_optional_location, load_node, load_optional_location, load_location, location)
        when 39 then
-          ElseNode.new(load_location, load_optional_node, load_optional_location, start_offset, length)
+          ElseNode.new(load_location, load_optional_node, load_optional_location, location)
        when 40 then
-          EmbeddedStatementsNode.new(load_location, load_optional_node, load_location, start_offset, length)
+          EmbeddedStatementsNode.new(load_location, load_optional_node, load_location, location)
        when 41 then
-          EmbeddedVariableNode.new(load_location, load_node, start_offset, length)
+          EmbeddedVariableNode.new(load_location, load_node, location)
        when 42 then
-          EnsureNode.new(load_location, load_optional_node, load_location, start_offset, length)
+          EnsureNode.new(load_location, load_optional_node, load_location, location)
        when 43 then
-          FalseNode.new(start_offset, length)
+          FalseNode.new(location)
        when 44 then
-          FindPatternNode.new(load_optional_node, load_node, Array.new(load_varint) { load_node }, load_node, load_optional_location, load_optional_location, start_offset, length)
+          FindPatternNode.new(load_optional_node, load_node, Array.new(load_varint) { load_node }, load_node, load_optional_location, load_optional_location, location)
        when 45 then
-          FloatNode.new(start_offset, length)
+          FloatNode.new(location)
        when 46 then
-          ForNode.new(load_node, load_node, load_optional_node, load_location, load_location, load_optional_location, load_location, start_offset, length)
+          ForNode.new(load_node, load_node, load_optional_node, load_location, load_location, load_optional_location, load_location, location)
        when 47 then
-          ForwardingArgumentsNode.new(start_offset, length)
+          ForwardingArgumentsNode.new(location)
        when 48 then
-          ForwardingParameterNode.new(start_offset, length)
+          ForwardingParameterNode.new(location)
        when 49 then
-          ForwardingSuperNode.new(load_optional_node, start_offset, length)
+          ForwardingSuperNode.new(load_optional_node, location)
        when 50 then
-          GlobalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
+          GlobalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, location)
        when 51 then
-          GlobalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
+          GlobalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, location)
        when 52 then
-          GlobalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
+          GlobalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
        when 53 then
-          GlobalVariableReadNode.new(start_offset, length)
+          GlobalVariableReadNode.new(location)
        when 54 then
-          GlobalVariableWriteNode.new(load_location, load_optional_location, load_optional_node, start_offset, length)
+          GlobalVariableWriteNode.new(load_location, load_optional_location, load_optional_node, location)
        when 55 then
-          HashNode.new(load_location, Array.new(load_varint) { load_node }, load_location, start_offset, length)
+          HashNode.new(load_location, Array.new(load_varint) { load_node }, load_location, location)
        when 56 then
-          HashPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_location, load_optional_location, start_offset, length)
+          HashPatternNode.new(load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_location, load_optional_location, location)
        when 57 then
-          IfNode.new(load_optional_location, load_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length)
+          IfNode.new(load_optional_location, load_node, load_optional_node, load_optional_node, load_optional_location, location)
        when 58 then
-          ImaginaryNode.new(load_node, start_offset, length)
+          ImaginaryNode.new(load_node, location)
        when 59 then
-          InNode.new(load_node, load_optional_node, load_location, load_optional_location, start_offset, length)
+          InNode.new(load_node, load_optional_node, load_location, load_optional_location, location)
        when 60 then
-          InstanceVariableOperatorAndWriteNode.new(load_location, load_location, load_node, start_offset, length)
+          InstanceVariableOperatorAndWriteNode.new(load_location, load_location, load_node, location)
        when 61 then
-          InstanceVariableOperatorOrWriteNode.new(load_location, load_location, load_node, start_offset, length)
+          InstanceVariableOperatorOrWriteNode.new(load_location, load_location, load_node, location)
        when 62 then
-          InstanceVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
+          InstanceVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, location)
        when 63 then
-          InstanceVariableReadNode.new(start_offset, length)
+          InstanceVariableReadNode.new(location)
        when 64 then
-          InstanceVariableWriteNode.new(load_location, load_optional_node, load_optional_location, start_offset, length)
+          InstanceVariableWriteNode.new(load_location, load_optional_node, load_optional_location, location)
        when 65 then
-          IntegerNode.new(start_offset, length)
+          IntegerNode.new(location)
        when 66 then
-          InterpolatedRegularExpressionNode.new(load_location, Array.new(load_varint) { load_node }, load_location, load_varint, start_offset, length)
+          InterpolatedRegularExpressionNode.new(load_location, Array.new(load_varint) { load_node }, load_location, load_varint, location)
        when 67 then
-          InterpolatedStringNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, start_offset, length)
+          InterpolatedStringNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, location)
        when 68 then
-          InterpolatedSymbolNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, start_offset, length)
+          InterpolatedSymbolNode.new(load_optional_location, Array.new(load_varint) { load_node }, load_optional_location, location)
        when 69 then
-          InterpolatedXStringNode.new(load_location, Array.new(load_varint) { load_node }, load_location, start_offset, length)
+          InterpolatedXStringNode.new(load_location, Array.new(load_varint) { load_node }, load_location, location)
        when 70 then
-          KeywordHashNode.new(Array.new(load_varint) { load_node }, start_offset, length)
+          KeywordHashNode.new(Array.new(load_varint) { load_node }, location)
        when 71 then
-          KeywordParameterNode.new(load_location, load_optional_node, start_offset, length)
+          KeywordParameterNode.new(load_location, load_optional_node, location)
        when 72 then
-          KeywordRestParameterNode.new(load_location, load_optional_location, start_offset, length)
+          KeywordRestParameterNode.new(load_location, load_optional_location, location)
        when 73 then
-          LambdaNode.new(Array.new(load_varint) { load_constant }, load_location, load_optional_node, load_optional_node, start_offset, length)
+          LambdaNode.new(Array.new(load_varint) { load_constant }, load_location, load_optional_node, load_optional_node, location)
        when 74 then
-          LocalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
+          LocalVariableOperatorAndWriteNode.new(load_location, load_location, load_node, load_constant, location)
        when 75 then
-          LocalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, load_constant, start_offset, length)
+          LocalVariableOperatorOrWriteNode.new(load_location, load_location, load_node, load_constant, location)
        when 76 then
-          LocalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, load_constant, start_offset, length)
+          LocalVariableOperatorWriteNode.new(load_location, load_location, load_node, load_constant, load_constant, location)
        when 77 then
-          LocalVariableReadNode.new(load_constant, load_varint, start_offset, length)
+          LocalVariableReadNode.new(load_constant, load_varint, location)
        when 78 then
-          LocalVariableWriteNode.new(load_constant, load_varint, load_optional_node, load_location, load_optional_location, start_offset, length)
+          LocalVariableWriteNode.new(load_constant, load_varint, load_optional_node, load_location, load_optional_location, location)
        when 79 then
-          MatchPredicateNode.new(load_node, load_node, load_location, start_offset, length)
+          MatchPredicateNode.new(load_node, load_node, load_location, location)
        when 80 then
-          MatchRequiredNode.new(load_node, load_node, load_location, start_offset, length)
+          MatchRequiredNode.new(load_node, load_node, load_location, location)
        when 81 then
-          MissingNode.new(start_offset, length)
+          MissingNode.new(location)
        when 82 then
-          ModuleNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_node, load_location, start_offset, length)
+          ModuleNode.new(Array.new(load_varint) { load_constant }, load_location, load_node, load_optional_node, load_location, location)
        when 83 then
-          MultiWriteNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_location, load_optional_location, start_offset, length)
+          MultiWriteNode.new(Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_location, load_optional_location, location)
        when 84 then
-          NextNode.new(load_optional_node, load_location, start_offset, length)
+          NextNode.new(load_optional_node, load_location, location)
        when 85 then
-          NilNode.new(start_offset, length)
+          NilNode.new(location)
        when 86 then
-          NoKeywordsParameterNode.new(load_location, load_location, start_offset, length)
+          NoKeywordsParameterNode.new(load_location, load_location, location)
        when 87 then
-          NumberedReferenceReadNode.new(start_offset, length)
+          NumberedReferenceReadNode.new(location)
        when 88 then
-          OptionalParameterNode.new(load_constant, load_location, load_location, load_node, start_offset, length)
+          OptionalParameterNode.new(load_constant, load_location, load_location, load_node, location)
        when 89 then
-          OrNode.new(load_node, load_node, load_location, start_offset, length)
+          OrNode.new(load_node, load_node, load_location, location)
        when 90 then
-          ParametersNode.new(Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_node, start_offset, length)
+          ParametersNode.new(Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, Array.new(load_varint) { load_node }, load_optional_node, Array.new(load_varint) { load_node }, load_optional_node, load_optional_node, location)
        when 91 then
-          ParenthesesNode.new(load_optional_node, load_location, load_location, start_offset, length)
+          ParenthesesNode.new(load_optional_node, load_location, load_location, location)
        when 92 then
-          PinnedExpressionNode.new(load_node, load_location, load_location, load_location, start_offset, length)
+          PinnedExpressionNode.new(load_node, load_location, load_location, load_location, location)
        when 93 then
-          PinnedVariableNode.new(load_node, load_location, start_offset, length)
+          PinnedVariableNode.new(load_node, load_location, location)
        when 94 then
-          PostExecutionNode.new(load_optional_node, load_location, load_location, load_location, start_offset, length)
+          PostExecutionNode.new(load_optional_node, load_location, load_location, load_location, location)
        when 95 then
-          PreExecutionNode.new(load_optional_node, load_location, load_location, load_location, start_offset, length)
+          PreExecutionNode.new(load_optional_node, load_location, load_location, load_location, location)
        when 96 then
-          ProgramNode.new(Array.new(load_varint) { load_constant }, load_node, start_offset, length)
+          ProgramNode.new(Array.new(load_varint) { load_constant }, load_node, location)
        when 97 then
-          RangeNode.new(load_optional_node, load_optional_node, load_location, load_varint, start_offset, length)
+          RangeNode.new(load_optional_node, load_optional_node, load_location, load_varint, location)
        when 98 then
-          RationalNode.new(load_node, start_offset, length)
+          RationalNode.new(load_node, location)
        when 99 then
-          RedoNode.new(start_offset, length)
+          RedoNode.new(location)
        when 100 then
-          RegularExpressionNode.new(load_location, load_location, load_location, load_string, load_varint, start_offset, length)
+          RegularExpressionNode.new(load_location, load_location, load_location, load_string, load_varint, location)
        when 101 then
-          RequiredDestructuredParameterNode.new(Array.new(load_varint) { load_node }, load_location, load_location, start_offset, length)
+          RequiredDestructuredParameterNode.new(Array.new(load_varint) { load_node }, load_location, load_location, location)
        when 102 then
-          RequiredParameterNode.new(load_constant, start_offset, length)
+          RequiredParameterNode.new(load_constant, location)
        when 103 then
-          RescueModifierNode.new(load_node, load_location, load_node, start_offset, length)
+          RescueModifierNode.new(load_node, load_location, load_node, location)
        when 104 then
-          RescueNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_node, load_optional_node, start_offset, length)
+          RescueNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_location, load_optional_node, load_optional_node, load_optional_node, location)
        when 105 then
-          RestParameterNode.new(load_location, load_optional_location, start_offset, length)
+          RestParameterNode.new(load_location, load_optional_location, location)
        when 106 then
-          RetryNode.new(start_offset, length)
+          RetryNode.new(location)
        when 107 then
-          ReturnNode.new(load_location, load_optional_node, start_offset, length)
+          ReturnNode.new(load_location, load_optional_node, location)
        when 108 then
-          SelfNode.new(start_offset, length)
+          SelfNode.new(location)
        when 109 then
-          SingletonClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_location, load_node, load_optional_node, load_location, start_offset, length)
+          SingletonClassNode.new(Array.new(load_varint) { load_constant }, load_location, load_location, load_node, load_optional_node, load_location, location)
        when 110 then
-          SourceEncodingNode.new(start_offset, length)
+          SourceEncodingNode.new(location)
        when 111 then
-          SourceFileNode.new(load_string, start_offset, length)
+          SourceFileNode.new(load_string, location)
        when 112 then
-          SourceLineNode.new(start_offset, length)
+          SourceLineNode.new(location)
        when 113 then
-          SplatNode.new(load_location, load_optional_node, start_offset, length)
+          SplatNode.new(load_location, load_optional_node, location)
        when 114 then
-          StatementsNode.new(Array.new(load_varint) { load_node }, start_offset, length)
+          StatementsNode.new(Array.new(load_varint) { load_node }, location)
        when 115 then
-          StringConcatNode.new(load_node, load_node, start_offset, length)
+          StringConcatNode.new(load_node, load_node, location)
        when 116 then
-          StringNode.new(load_optional_location, load_location, load_optional_location, load_string, start_offset, length)
+          StringNode.new(load_optional_location, load_location, load_optional_location, load_string, location)
        when 117 then
-          SuperNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, start_offset, length)
+          SuperNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, load_optional_node, location)
        when 118 then
-          SymbolNode.new(load_optional_location, load_location, load_optional_location, load_string, start_offset, length)
+          SymbolNode.new(load_optional_location, load_location, load_optional_location, load_string, location)
        when 119 then
-          TrueNode.new(start_offset, length)
+          TrueNode.new(location)
        when 120 then
-          UndefNode.new(Array.new(load_varint) { load_node }, load_location, start_offset, length)
+          UndefNode.new(Array.new(load_varint) { load_node }, load_location, location)
        when 121 then
-          UnlessNode.new(load_location, load_node, load_optional_node, load_optional_node, load_optional_location, start_offset, length)
+          UnlessNode.new(load_location, load_node, load_optional_node, load_optional_node, load_optional_location, location)
        when 122 then
-          UntilNode.new(load_location, load_node, load_optional_node, start_offset, length)
+          UntilNode.new(load_location, load_node, load_optional_node, location)
        when 123 then
-          WhenNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_node, start_offset, length)
+          WhenNode.new(load_location, Array.new(load_varint) { load_node }, load_optional_node, location)
        when 124 then
-          WhileNode.new(load_location, load_node, load_optional_node, start_offset, length)
+          WhileNode.new(load_location, load_node, load_optional_node, location)
        when 125 then
-          XStringNode.new(load_location, load_location, load_location, load_string, start_offset, length)
+          XStringNode.new(load_location, load_location, load_location, load_string, location)
        when 126 then
-          YieldNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, start_offset, length)
+          YieldNode.new(load_location, load_optional_location, load_optional_node, load_optional_location, location)
        end
      end
    end
--- a/test/yarp/compile_test.rb
+++ b/test/yarp/compile_test.rb
@ -1,212 +0,0 @@
-# frozen_string_literal: true
-
-require "yarp_test_helper"
-
-class CompileTest < Test::Unit::TestCase
-  def test_AliasNode
-    assert_compiles("alias foo bar")
-  end
-
-  def test_AndNode
-    assert_compiles("true && false")
-  end
-
-  def test_ArrayNode
-    assert_compiles("[]")
-    assert_compiles("[foo, bar, baz]")
-  end
-
-  def test_AssocNode
-    assert_compiles("{ foo: bar }")
-  end
-
-  def test_BlockNode
-    assert_compiles("foo { bar }")
-  end
-
-  def test_BlockNode_with_optionals
-    assert_compiles("foo { |x = 1| bar }")
-  end
-
-  def test_CallNode
-    assert_compiles("foo")
-    assert_compiles("foo(bar)")
-  end
-
-  def test_ClassVariableReadNode
-    assert_compiles("@@foo")
-  end
-
-  def test_ClassVariableWriteNode
-    assert_compiles("@@foo = 1")
-  end
-
-  def test_FalseNode
-    assert_compiles("false")
-  end
-
-  def test_GlobalVariableReadNode
-    assert_compiles("$foo")
-  end
-
-  def test_GlobalVariableWriteNode
-    assert_compiles("$foo = 1")
-  end
-
-  def test_HashNode
-    assert_compiles("{ foo: bar }")
-  end
-
-  def test_InstanceVariableReadNode
-    assert_compiles("@foo")
-  end
-
-  def test_InstanceVariableWriteNode
-    assert_compiles("@foo = 1")
-  end
-
-  def test_IntegerNode
-    assert_compiles("1")
-    assert_compiles("1_000")
-  end
-
-  def test_InterpolatedStringNode
-    assert_compiles("\"foo \#{bar} baz\"")
-  end
-
-  def test_LocalVariableWriteNode
-    assert_compiles("foo = 1")
-  end
-
-  def test_LocalVariableReadNode
-    assert_compiles("[foo = 1, foo]")
-  end
-
-  def test_NilNode
-    assert_compiles("nil")
-  end
-
-  def test_OrNode
-    assert_compiles("true || false")
-  end
-
-  def test_ParenthesesNode
-    assert_compiles("()")
-  end
-
-  def test_ProgramNode
-    assert_compiles("")
-  end
-
-  def test_RangeNode
-    assert_compiles("foo..bar")
-    assert_compiles("foo...bar")
-    assert_compiles("(foo..)")
-    assert_compiles("(foo...)")
-    assert_compiles("(..bar)")
-    assert_compiles("(...bar)")
-  end
-
-  def test_SelfNode
-    assert_compiles("self")
-  end
-
-  def test_StringNode
-    assert_compiles("\"foo\"")
-  end
-
-  def test_SymbolNode
-    assert_compiles(":foo")
-  end
-
-  def test_TrueNode
-    assert_compiles("true")
-  end
-
-  def test_UndefNode
-    assert_compiles("undef :foo, :bar, :baz")
-  end
-
-  def test_XStringNode
-    assert_compiles("`foo`")
-  end
-
-  private
-
-  def assert_compiles(source)
-    assert_equal_iseqs(rubyvm_compile(source), YARP.compile(source))
-  end
-
-  # Instruction sequences have 13 elements in their lists. We don't currently
-  # support all of the fields, so we can't compare the iseqs directly. Instead,
-  # we compare the elements that we do support.
-  def assert_equal_iseqs(expected, actual)
-    # The first element is the magic comment string.
-    assert_equal expected[0], actual[0]
-
-    # The next three elements are the major, minor, and patch version numbers.
-    # TODO: Insert this check once Ruby 3.3 is released, and the TruffleRuby
-    # GitHub workflow also checks against Ruby 3.3
-    # assert_equal expected[1...4], actual[1...4]
-
-    # The next element is a set of options for the iseq. It has lots of
-    # different information, some of which we support and some of which we
-    # don't.
-    assert_equal expected[4][:arg_size], actual[4][:arg_size], "Unexpected difference in arg_size"
-    assert_equal expected[4][:stack_max], actual[4][:stack_max], "Unexpected difference in stack_max"
-
-    assert_kind_of Integer, actual[4][:local_size]
-    assert_kind_of Integer, actual[4][:node_id]
-
-    assert_equal expected[4][:code_location].length, actual[4][:code_location].length, "Unexpected difference in code_location length"
-    assert_equal expected[4][:node_ids].length, actual[4][:node_ids].length, "Unexpected difference in node_ids length"
-
-    # Then we have the name of the iseq, the relative file path, the absolute
-    # file path, and the line number. We don't have this working quite yet.
-    assert_kind_of String, actual[5]
-    assert_kind_of String, actual[6]
-    assert_kind_of String, actual[7]
-    assert_kind_of Integer, actual[8]
-
-    # Next we have the type of the iseq.
-    assert_equal expected[9], actual[9]
-
-    # Next we have the list of local variables. We don't support this yet.
-    assert_kind_of Array, actual[10]
-
-    # Next we have the argument options. These are used in block and method
-    # iseqs to reflect how the arguments are passed.
-    assert_equal expected[11], actual[11], "Unexpected difference in argument options"
-
-    # Next we have the catch table entries. We don't have this working yet.
-    assert_kind_of Array, actual[12]
-
-    # Finally we have the actual instructions. We support some of this, but omit
-    # line numbers and some tracepoint events.
-    expected[13].each do |insn|
-      case insn
-      in [:send, opnds, expected_block] unless expected_block.nil?
-        actual[13].shift => [:send, ^(opnds), actual_block]
-        assert_equal_iseqs expected_block, actual_block
-      in Array | :RUBY_EVENT_B_CALL | :RUBY_EVENT_B_RETURN | /^label_\d+/
-        assert_equal insn, actual[13].shift
-      in Integer | /^RUBY_EVENT_/
-        # skip these for now
-      else
-        flunk "Unexpected instruction: #{insn.inspect}"
-      end
-    end
-  end
-
-  def rubyvm_compile(source)
-    options = {
-      peephole_optimization: false,
-      specialized_instruction: false,
-      operands_unification: false,
-      instructions_unification: false,
-      frozen_string_literal: false
-    }
-
-    RubyVM::InstructionSequence.compile(source, **options).to_a
-  end
-end
--- a/test/yarp/encoding_test.rb
+++ b/test/yarp/encoding_test.rb
@ -55,6 +55,21 @@ class EncodingTest < Test::Unit::TestCase
    assert_equal Encoding.find("utf-8"), actual
  end

+  # This test may be a little confusing. Basically when we use our strpbrk, it
+  # takes into account the encoding of the file.
+  def test_strpbrk_multibyte
+    result = YARP.parse(<<~RUBY)
+      # encoding: Shift_JIS
+      %w[\x81\x5c]
+    RUBY
+
+    assert(result.errors.empty?)
+    assert_equal(
+      (+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
+      result.value.statements.body.first.elements.first.unescaped
+    )
+  end
+
  def test_utf_8_variations
    %w[
      utf-8-unix
--- a/test/yarp/fixtures/not.txt
+++ b/test/yarp/fixtures/not.txt
@ -18,3 +18,16 @@ not foo and


  bar
+
+not(foo
+
+
+)
+
+not(
+
+
+foo
+
+
+  )
--- a/test/yarp/fixtures/patterns.txt
+++ b/test/yarp/fixtures/patterns.txt
@ -76,6 +76,7 @@ foo => Foo(*bar, baz, *qux)
 foo => Foo[]
 foo => Foo[1]
 foo => Foo[1, 2, 3]
+foo => Foo[Foo[]]
 foo => Foo[bar]
 foo => Foo[*bar, baz]
 foo => Foo[bar, *baz]
--- a/test/yarp/language_server_test.rb
+++ b/test/yarp/language_server_test.rb
@ -1,363 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "yarp_test_helper"
-require "yarp/language_server"
-
-module YARP
-  class LanguageServerTest < Test::Unit::TestCase
-    module Request
-      # Represents a hash pattern.
-      class Shape
-        attr_reader :values
-
-        def initialize(values)
-          @values = values
-        end
-
-        def ===(other)
-          values.all? do |key, value|
-            value == :any ? other.key?(key) : value === other[key]
-          end
-        end
-      end
-
-      # Represents an array pattern.
-      class Tuple
-        attr_reader :values
-
-        def initialize(values)
-          @values = values
-        end
-
-        def ===(other)
-          values.each_with_index.all? { |value, index| value === other[index] }
-        end
-      end
-
-      def self.[](value)
-        case value
-        when Array
-          Tuple.new(value.map { |child| self[child] })
-        when Hash
-          Shape.new(value.transform_values { |child| self[child] })
-        else
-          value
-        end
-      end
-    end
-
-    class Initialize < Struct.new(:id)
-      def to_hash
-        { method: "initialize", id: id }
-      end
-    end
-
-    class Shutdown < Struct.new(:id)
-      def to_hash
-        { method: "shutdown", id: id }
-      end
-    end
-
-    class TextDocumentDidOpen < Struct.new(:uri, :text)
-      def to_hash
-        {
-          method: "textDocument/didOpen",
-          params: { textDocument: { uri: uri, text: text } }
-        }
-      end
-    end
-
-    class TextDocumentDidChange < Struct.new(:uri, :text)
-      def to_hash
-        {
-          method: "textDocument/didChange",
-          params: {
-            textDocument: { uri: uri },
-            contentChanges: [{ text: text }]
-          }
-        }
-      end
-    end
-
-    class TextDocumentDidClose < Struct.new(:uri)
-      def to_hash
-        {
-          method: "textDocument/didClose",
-          params: { textDocument: { uri: uri } }
-        }
-      end
-    end
-
-    class TextDocumentCodeAction < Struct.new(:id, :uri, :diagnostics)
-      def to_hash
-        {
-          method: "textDocument/codeAction",
-          id: id,
-          params: {
-            textDocument: { uri: uri },
-            context: {
-              diagnostics: diagnostics,
-            },
-          },
-        }
-      end
-    end
-
-    class TextDocumentDiagnostic < Struct.new(:id, :uri)
-      def to_hash
-        {
-          method: "textDocument/diagnostic",
-          id: id,
-          params: {
-            textDocument: { uri: uri },
-          }
-        }
-      end
-    end
-
-    def test_reading_file
-      Tempfile.create(%w[test- .rb]) do |file|
-        file.write("class Foo; end")
-        file.rewind
-
-        responses = run_server([
-          Initialize.new(1),
-          Shutdown.new(3)
-        ])
-
-        shape = Request[[
-          { id: 1, result: { capabilities: Hash } },
-          { id: 3, result: {} }
-        ]]
-
-        assert_operator(shape, :===, responses)
-      end
-    end
-
-    def test_clean_shutdown
-      responses = run_server([Initialize.new(1), Shutdown.new(2)])
-
-      shape = Request[[
-        { id: 1, result: { capabilities: Hash } },
-        { id: 2, result: {} }
-      ]]
-
-      assert_operator(shape, :===, responses)
-    end
-
-    def test_file_that_does_not_exist
-      responses = run_server([
-        Initialize.new(1),
-        Shutdown.new(3)
-      ])
-
-      shape = Request[[
-        { id: 1, result: { capabilities: Hash } },
-        { id: 3, result: {} }
-      ]]
-
-      assert_operator(shape, :===, responses)
-    end
-
-    def test_code_action_request
-      message = "this is an error"
-      diagnostic = {
-        range: { start: { line: 0, character: 0 }, end: { line: 0, character: 0 } },
-        message: message,
-        severity: 1,
-      }
-      responses = run_server([
-        Initialize.new(1),
-        TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
-          1 + (
-        RUBY
-        TextDocumentCodeAction.new(2, "file:///path/to/file.rb", [diagnostic]),
-        Shutdown.new(3)
-      ])
-
-      shape = Request[[
-        { id: 1, result: { capabilities: Hash } },
-        { id: 2, result: [
-            {
-              title: "Report incorrect error: `#{message}`",
-              kind: "quickfix",
-              diagnostics: [diagnostic],
-              command: {
-                title: "Report incorrect error",
-                command: "vscode.open",
-                arguments: [String]
-              }
-            }
-          ],
-        },
-        { id: 3, result: {} }
-      ]]
-
-      assert_operator(shape, :===, responses)
-      assert(responses.dig(1, :result, 0, :command, :arguments, 0).include?(URI.encode_www_form_component(message)))
-    end
-
-    def test_code_action_request_no_diagnostic
-      responses = run_server([
-        Initialize.new(1),
-        TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
-          1 + (
-        RUBY
-        TextDocumentCodeAction.new(2, "file:///path/to/file.rb", []),
-        Shutdown.new(3)
-      ])
-
-      shape = Request[[
-        { id: 1, result: { capabilities: Hash } },
-        { id: 2, result: [] },
-        { id: 3, result: {} }
-      ]]
-
-      assert_operator(shape, :===, responses)
-    end
-
-    def test_code_action_request_no_content
-      message = "this is an error"
-      diagnostic = {
-        range: { start: { line: 0, character: 0 }, end: { line: 0, character: 0 } },
-        message: message,
-        severity: 1,
-      }
-      responses = run_server([
-        Initialize.new(1),
-        TextDocumentCodeAction.new(2, "file:///path/to/file.rb", [diagnostic]),
-        Shutdown.new(3)
-      ])
-
-      shape = Request[[
-        { id: 1, result: { capabilities: Hash } },
-        { id: 2, result: nil },
-        { id: 3, result: {} }
-      ]]
-
-      assert_operator(shape, :===, responses)
-    end
-
-    def test_diagnostics_request_error
-      responses = run_server([
-        Initialize.new(1),
-        TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
-          1 + (
-        RUBY
-        TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
-        Shutdown.new(3)
-      ])
-
-      shape = Request[[
-        { id: 1, result: { capabilities: Hash } },
-        { id: 2, result: { kind: "full", items: [
-          {
-            range: {
-              start: { line: Integer, character: Integer },
-              end: { line: Integer, character: Integer }
-            },
-            message: String,
-            severity: Integer
-          },
-        ] } },
-        { id: 3, result: {} }
-      ]]
-
-      assert_operator(shape, :===, responses)
-      assert(responses.dig(1, :result, :items).count { |item| item[:severity] == 1 } > 0)
-    end
-
-    def test_diagnostics_request_warning
-      responses = run_server([
-        Initialize.new(1),
-        TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
-          a/b /c
-        RUBY
-        TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
-        Shutdown.new(3)
-      ])
-
-      shape = Request[[
-        { id: 1, result: { capabilities: Hash } },
-        { id: 2, result: { kind: "full", items: [
-          {
-            range: {
-              start: { line: Integer, character: Integer },
-              end: { line: Integer, character: Integer }
-            },
-            message: String,
-            severity: Integer
-          },
-        ] } },
-        { id: 3, result: {} }
-      ]]
-
-      assert_operator(shape, :===, responses)
-      assert(responses.dig(1, :result, :items).count { |item| item[:severity] == 2 } > 0)
-    end
-
-    def test_diagnostics_request_nothing
-      responses = run_server([
-        Initialize.new(1),
-        TextDocumentDidOpen.new("file:///path/to/file.rb", <<~RUBY),
-          a = 1
-        RUBY
-        TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
-        Shutdown.new(3)
-      ])
-
-      shape = Request[[
-        { id: 1, result: { capabilities: Hash } },
-        { id: 2, result: { kind: "full", items: [] } },
-        { id: 3, result: {} }
-      ]]
-
-      assert_operator(shape, :===, responses)
-      assert_equal(0, responses.dig(1, :result, :items).size)
-    end
-
-    def test_diagnostics_request_no_content
-      responses = run_server([
-        Initialize.new(1),
-        TextDocumentDiagnostic.new(2, "file:///path/to/file.rb"),
-        Shutdown.new(3)
-      ])
-
-      shape = Request[[
-        { id: 1, result: { capabilities: Hash } },
-        { id: 2, result: nil },
-        { id: 3, result: {} }
-      ]]
-
-      assert_operator(shape, :===, responses)
-    end
-
-    private
-
-    def write(content)
-      request = content.to_hash.merge(jsonrpc: "2.0").to_json
-      "Content-Length: #{request.bytesize}\r\n\r\n#{request}"
-    end
-
-    def read(content)
-      [].tap do |messages|
-        while (headers = content.gets("\r\n\r\n"))
-          source = content.read(headers[/Content-Length: (\d+)/i, 1].to_i)
-          messages << JSON.parse(source, symbolize_names: true)
-        end
-      end
-    end
-
-    def run_server(messages)
-      input = StringIO.new(messages.map { |message| write(message) }.join)
-      output = StringIO.new
-
-      LanguageServer.new(
-        input: input,
-        output: output,
-      ).run
-
-      read(output.tap(&:rewind))
-    end
-  end
-end
--- a/test/yarp/parse_test.rb
+++ b/test/yarp/parse_test.rb
@ -3,10 +3,12 @@
 require "yarp_test_helper"

 class ParseTest < Test::Unit::TestCase
-  # Because we're reading the snapshots from disk, we need to make sure that
-  # they're encoded as UTF-8. When certain settings are present this might not
-  # always be the case (e.g., LANG=C or -Eascii-8bit). So here we force the
-  # default external encoding for the duration of the test.
+  # When we pretty-print the trees to compare against the snapshots, we want to
+  # be certain that we print with the same external encoding. This is because
+  # methods like Symbol#inspect take into account external encoding and it could
+  # change how the snapshot is generated. On machines with certain settings
+  # (like LANG=C or -Eascii-8bit) this could have been changed. So here we're
+  # going to force it to be UTF-8 to keep the snapshots consistent.
  def setup
    @previous_default_external = Encoding.default_external
    ignore_warnings { Encoding.default_external = Encoding::UTF_8 }
@ -29,20 +31,6 @@ class ParseTest < Test::Unit::TestCase
    seattlerb/pct_w_heredoc_interp_nested.txt
  ]

-  # Because the filepath in SourceFileNodes is different from one maching to the
-  # next, PP.pp(sexp, +"", 79) can have different results: both the path itself
-  # and the line breaks based on the length of the path.
-  def normalize_printed(printed)
-    printed
-      .gsub(
-        /SourceFileNode \s*
-          \(\s* (\d+\.\.\.\d+) \s*\) \s*
-          \(\s* ("[^"]*")      \s*\)
-        /mx,
-        'SourceFileNode(\1)(\2)')
-      .gsub(__dir__, "")
-  end
-
  def find_source_file_node(node)
    if node.is_a?(YARP::SourceFileNode)
      node
@ -79,27 +67,26 @@ class ParseTest < Test::Unit::TestCase
      # that is invalid Ruby.
      refute_nil Ripper.sexp_raw(source)

-      # Next, parse the source and print the value.
-      result = YARP.parse_file(filepath)
-      value = result.value
-      printed = normalize_printed(PP.pp(value, +"", 79))
-
      # Next, assert that there were no errors during parsing.
-      assert_empty result.errors, value
+      result = YARP.parse(source, relative)
+      assert_empty result.errors
+
+      # Next, pretty print the source.
+      printed = PP.pp(result.value, +"", 79)

      if File.exist?(snapshot)
-        normalized = normalize_printed(File.read(snapshot))
+        saved = File.read(snapshot)

        # If the snapshot file exists, but the printed value does not match the
        # snapshot, then update the snapshot file.
-        if normalized != printed
-          File.write(snapshot, normalized)
+        if printed != saved
+          File.write(snapshot, printed)
          warn("Updated snapshot at #{snapshot}.")
        end

        # If the snapshot file exists, then assert that the printed value
        # matches the snapshot.
-        assert_equal(normalized, printed)
+        assert_equal(saved, printed)
      else
        # If the snapshot file does not yet exist, then write it out now.
        File.write(snapshot, printed)
@ -108,11 +95,11 @@ class ParseTest < Test::Unit::TestCase

      # Next, assert that the value can be serialized and deserialized without
      # changing the shape of the tree.
-      assert_equal_nodes(value, YARP.load(source, YARP.dump(source, filepath)))
+      assert_equal_nodes(result.value, YARP.load(source, YARP.dump(source, relative)))

      # Next, assert that the newlines are in the expected places.
      expected_newlines = [0]
-      source.b.scan("\n") { expected_newlines << $~.offset(0)[0] }
+      source.b.scan("\n") { expected_newlines << $~.offset(0)[0] + 1 }
      assert_equal expected_newlines, YARP.newlines(source)

      # Finally, assert that we can lex the source and get the same tokens as
--- a/test/yarp/regexp_test.rb
+++ b/test/yarp/regexp_test.rb
@ -101,6 +101,10 @@ class RegexpTest < Test::Unit::TestCase
    refute_nil(YARP.named_captures("(?#foo)"))
  end

+  def test_comments_with_escaped_parentheses
+    refute_nil(YARP.named_captures("(?#foo\\)\\))"))
+  end
+
  def test_non_capturing_groups
    refute_nil(YARP.named_captures("(?:foo)"))
  end
--- a/test/yarp/snapshots/keyword_method_names.txt
+++ b/test/yarp/snapshots/keyword_method_names.txt
@ -102,7 +102,7 @@ ProgramNode(0...185)(
     StringNode(123...129)((123...125), (125...128), (128...129), "abc"),
     DefNode(131...149)(
       (144...145),
-       SourceFileNode(135...143)("/fixtures/keyword_method_names.txt"),
+       SourceFileNode(135...143)("keyword_method_names.txt"),
       nil,
       nil,
       [],
--- a/test/yarp/snapshots/keywords.txt
+++ b/test/yarp/snapshots/keywords.txt
@ -5,7 +5,7 @@ ProgramNode(0...51)(
     RetryNode(6...11)(),
     SelfNode(13...17)(),
     SourceEncodingNode(19...31)(),
-     SourceFileNode(33...41)("/fixtures/keywords.txt"),
+     SourceFileNode(33...41)("keywords.txt"),
     SourceLineNode(43...51)()]
  )
 )
--- a/test/yarp/snapshots/not.txt
+++ b/test/yarp/snapshots/not.txt
@ -1,6 +1,6 @@
-ProgramNode(0...125)(
+ProgramNode(0...156)(
  [],
-  StatementsNode(0...125)(
+  StatementsNode(0...156)(
    [AndNode(0...19)(
       CallNode(0...7)(
         CallNode(4...7)(nil, nil, (4...7), nil, nil, nil, nil, 0, "foo"),
@ -146,6 +146,48 @@ ProgramNode(0...125)(
         "!"
       ),
       (108...111)
+     ),
+     CallNode(127...138)(
+       CallNode(131...134)(
+         nil,
+         nil,
+         (131...134),
+         nil,
+         nil,
+         nil,
+         nil,
+         0,
+         "foo"
+       ),
+       nil,
+       (127...130),
+       (130...131),
+       nil,
+       (137...138),
+       nil,
+       0,
+       "!"
+     ),
+     CallNode(140...156)(
+       CallNode(147...150)(
+         nil,
+         nil,
+         (147...150),
+         nil,
+         nil,
+         nil,
+         nil,
+         0,
+         "foo"
+       ),
+       nil,
+       (140...143),
+       (143...144),
+       nil,
+       (155...156),
+       nil,
+       0,
+       "!"
     )]
  )
 )
--- a/test/yarp/snapshots/patterns.txt
+++ b/test/yarp/snapshots/patterns.txt
--- a/test/yarp/snapshots/unparser/corpus/literal/pragma.txt
+++ b/test/yarp/snapshots/unparser/corpus/literal/pragma.txt
@ -2,7 +2,7 @@ ProgramNode(0...38)(
  [],
  StatementsNode(0...38)(
    [SourceEncodingNode(0...12)(),
-     SourceFileNode(13...21)("/fixtures/unparser/corpus/literal/pragma.txt"),
+     SourceFileNode(13...21)("unparser/corpus/literal/pragma.txt"),
     SourceLineNode(22...30)(),
     CallNode(31...38)(nil, nil, (31...38), nil, nil, nil, nil, 0, "__dir__")]
  )
--- a/test/yarp/snapshots/whitequark/pattern_matchingFILELINE_literals.txt
+++ b/test/yarp/snapshots/whitequark/pattern_matchingFILELINE_literals.txt
@ -3,7 +3,9 @@ ProgramNode(8...111)(
  StatementsNode(8...111)(
    [CaseNode(8...111)(
       ArrayNode(13...51)(
-         [SourceFileNode(14...22)("/fixtures/whitequark/pattern_matching__FILE__LINE_literals.txt"),
+         [SourceFileNode(14...22)(
+            "whitequark/pattern_matching__FILE__LINE_literals.txt"
+          ),
          CallNode(24...36)(
            SourceLineNode(24...32)(),
            nil,
@ -22,7 +24,9 @@ ProgramNode(8...111)(
       [InNode(62...99)(
          ArrayPatternNode(65...99)(
            nil,
-            [SourceFileNode(66...74)("/fixtures/whitequark/pattern_matching__FILE__LINE_literals.txt"),
+            [SourceFileNode(66...74)(
+               "whitequark/pattern_matching__FILE__LINE_literals.txt"
+             ),
             SourceLineNode(76...84)(),
             SourceEncodingNode(86...98)()],
            nil,
--- a/test/yarp/snapshots/whitequark/string_FILE.txt
+++ b/test/yarp/snapshots/whitequark/string_FILE.txt
@ -1,6 +1,6 @@
 ProgramNode(0...8)(
  [],
  StatementsNode(0...8)(
-    [SourceFileNode(0...8)("/fixtures/whitequark/string___FILE__.txt")]
+    [SourceFileNode(0...8)("whitequark/string___FILE__.txt")]
  )
 )
--- a/yarp/api_node.c
+++ b/yarp/api_node.c
--- a/yarp/ast.h
+++ b/yarp/ast.h
@ -9,14 +9,13 @@
 #define YARP_AST_H

 #include "yarp/defines.h"
+#include "yarp/util/yp_constant_pool.h"
+#include "yarp/util/yp_string.h"

 #include <assert.h>
 #include <stddef.h>
 #include <stdint.h>

-#include "yarp/util/yp_constant_pool.h"
-#include "yarp/util/yp_string.h"
-
 // This enum represents every type of token in the Ruby source.
 typedef enum yp_token_type {
    YP_TOKEN_EOF = 1, // final token in the file
--- a/yarp/compile.c
+++ b/yarp/compile.c
@ -1,826 +0,0 @@
-#include "yarp/extension.h"
-
-typedef enum {
-    YP_ISEQ_TYPE_TOP,
-    YP_ISEQ_TYPE_BLOCK
-} yp_iseq_type_t;
-
-typedef enum {
-    YP_RUBY_EVENT_B_CALL,
-    YP_RUBY_EVENT_B_RETURN
-} yp_ruby_event_t;
-
-typedef struct yp_iseq_compiler {
-    // This is the parent compiler. It is used to communicate between ISEQs that
-    // need to be able to jump back to the parent ISEQ.
-    struct yp_iseq_compiler *parent;
-
-    // This is the list of local variables that are defined on this scope.
-    yp_constant_id_list_t *locals;
-
-    // This is the instruction sequence that we are compiling. It's actually just
-    // a Ruby array that maps to the output of RubyVM::InstructionSequence#to_a.
-    VALUE insns;
-
-    // This is a list of IDs coming from the instructions that are being compiled.
-    // In theory they should be deterministic, but we don't have that
-    // functionality yet. Fortunately you can pass -1 for all of them and
-    // everything for the most part continues to work.
-    VALUE node_ids;
-
-    // This is the current size of the instruction sequence's stack.
-    int stack_size;
-
-    // This is the maximum size of the instruction sequence's stack.
-    int stack_max;
-
-    // This is the name of the instruction sequence.
-    const char *name;
-
-    // This is the type of the instruction sequence.
-    yp_iseq_type_t type;
-
-    // This is the optional argument information.
-    VALUE optionals;
-
-    // This is the number of arguments.
-    int arg_size;
-
-    // This is the current size of the instruction sequence's instructions and
-    // operands.
-    size_t size;
-
-    // This is the index of the current inline storage.
-    size_t inline_storage_index;
-} yp_iseq_compiler_t;
-
-static void
-yp_iseq_compiler_init(yp_iseq_compiler_t *compiler, yp_iseq_compiler_t *parent, yp_constant_id_list_t *locals, const char *name, yp_iseq_type_t type) {
-    *compiler = (yp_iseq_compiler_t) {
-        .parent = parent,
-        .locals = locals,
-        .insns = rb_ary_new(),
-        .node_ids = rb_ary_new(),
-        .stack_size = 0,
-        .stack_max = 0,
-        .name = name,
-        .type = type,
-        .optionals = rb_hash_new(),
-        .arg_size = 0,
-        .size = 0,
-        .inline_storage_index = 0
-    };
-}
-
-/******************************************************************************/
-/* Utilities                                                                  */
-/******************************************************************************/
-
-static inline int
-sizet2int(size_t value) {
-    if (value > INT_MAX) rb_raise(rb_eRuntimeError, "value too large");
-    return (int) value;
-}
-
-static int
-local_index(yp_iseq_compiler_t *compiler, yp_constant_id_t constant_id, int depth) {
-    int compiler_index;
-    yp_iseq_compiler_t *local_compiler = compiler;
-
-    for (compiler_index = 0; compiler_index < depth; compiler_index++) {
-        local_compiler = local_compiler->parent;
-        assert(local_compiler != NULL);
-    }
-
-    size_t index;
-    for (index = 0; index < local_compiler->locals->size; index++) {
-        if (local_compiler->locals->ids[index] == constant_id) {
-            return sizet2int(local_compiler->locals->size - index + 2);
-        }
-    }
-
-    return -1;
-}
-
-/******************************************************************************/
-/* Parse specific VALUEs from strings                                         */
-/******************************************************************************/
-
-static VALUE
-parse_number(const char *start, const char *end) {
-    size_t length = end - start;
-
-    char *buffer = alloca(length + 1);
-    memcpy(buffer, start, length);
-
-    buffer[length] = '\0';
-    return rb_cstr_to_inum(buffer, -10, Qfalse);
-}
-
-static inline VALUE
-parse_string(yp_string_t *string) {
-    return rb_str_new(yp_string_source(string), yp_string_length(string));
-}
-
-static inline ID
-parse_symbol(const char *start, const char *end) {
-    return rb_intern2(start, end - start);
-}
-
-static inline ID
-parse_location_symbol(yp_location_t *location) {
-    return parse_symbol(location->start, location->end);
-}
-
-static inline ID
-parse_node_symbol(yp_node_t *node) {
-    return parse_symbol(node->location.start, node->location.end);
-}
-
-static inline ID
-parse_string_symbol(yp_string_t *string) {
-    const char *start = yp_string_source(string);
-    return parse_symbol(start, start + yp_string_length(string));
-}
-
-/******************************************************************************/
-/* Create Ruby objects for compilation                                        */
-/******************************************************************************/
-
-static VALUE
-yp_iseq_new(yp_iseq_compiler_t *compiler) {
-    VALUE code_location = rb_ary_new_capa(4);
-    rb_ary_push(code_location, INT2FIX(1));
-    rb_ary_push(code_location, INT2FIX(0));
-    rb_ary_push(code_location, INT2FIX(1));
-    rb_ary_push(code_location, INT2FIX(0));
-
-    VALUE data = rb_hash_new();
-    rb_hash_aset(data, ID2SYM(rb_intern("arg_size")), INT2FIX(compiler->arg_size));
-    rb_hash_aset(data, ID2SYM(rb_intern("local_size")), INT2FIX(0));
-    rb_hash_aset(data, ID2SYM(rb_intern("stack_max")), INT2FIX(compiler->stack_max));
-    rb_hash_aset(data, ID2SYM(rb_intern("node_id")), INT2FIX(-1));
-    rb_hash_aset(data, ID2SYM(rb_intern("code_location")), code_location);
-    rb_hash_aset(data, ID2SYM(rb_intern("node_ids")), compiler->node_ids);
-
-    VALUE type = Qnil;
-    switch (compiler->type) {
-        case YP_ISEQ_TYPE_TOP:
-            type = ID2SYM(rb_intern("top"));
-            break;
-        case YP_ISEQ_TYPE_BLOCK:
-            type = ID2SYM(rb_intern("block"));
-            break;
-    }
-
-    VALUE iseq = rb_ary_new_capa(13);
-    rb_ary_push(iseq, rb_str_new_cstr("YARVInstructionSequence/SimpleDataFormat"));
-    rb_ary_push(iseq, INT2FIX(3));
-    rb_ary_push(iseq, INT2FIX(3));
-    rb_ary_push(iseq, INT2FIX(1));
-    rb_ary_push(iseq, data);
-    rb_ary_push(iseq, rb_str_new_cstr(compiler->name));
-    rb_ary_push(iseq, rb_str_new_cstr("<compiled>"));
-    rb_ary_push(iseq, rb_str_new_cstr("<compiled>"));
-    rb_ary_push(iseq, INT2FIX(1));
-    rb_ary_push(iseq, type);
-    rb_ary_push(iseq, rb_ary_new());
-    rb_ary_push(iseq, compiler->optionals);
-    rb_ary_push(iseq, rb_ary_new());
-    rb_ary_push(iseq, compiler->insns);
-
-    return iseq;
-}
-
-// static const int YP_CALLDATA_ARGS_SPLAT = 1 << 0;
-// static const int YP_CALLDATA_ARGS_BLOCKARG = 1 << 1;
-static const int YP_CALLDATA_FCALL = 1 << 2;
-static const int YP_CALLDATA_VCALL = 1 << 3;
-static const int YP_CALLDATA_ARGS_SIMPLE = 1 << 4;
-// static const int YP_CALLDATA_BLOCKISEQ = 1 << 5;
-// static const int YP_CALLDATA_KWARG = 1 << 6;
-// static const int YP_CALLDATA_KW_SPLAT = 1 << 7;
-// static const int YP_CALLDATA_TAILCALL = 1 << 8;
-// static const int YP_CALLDATA_SUPER = 1 << 9;
-// static const int YP_CALLDATA_ZSUPER = 1 << 10;
-// static const int YP_CALLDATA_OPT_SEND = 1 << 11;
-// static const int YP_CALLDATA_KW_SPLAT_MUT = 1 << 12;
-
-static VALUE
-yp_calldata_new(ID mid, int flag, size_t orig_argc) {
-    VALUE calldata = rb_hash_new();
-
-    rb_hash_aset(calldata, ID2SYM(rb_intern("mid")), ID2SYM(mid));
-    rb_hash_aset(calldata, ID2SYM(rb_intern("flag")), INT2FIX(flag));
-    rb_hash_aset(calldata, ID2SYM(rb_intern("orig_argc")), INT2FIX(orig_argc));
-
-    return calldata;
-}
-
-static inline VALUE
-yp_inline_storage_new(yp_iseq_compiler_t *compiler) {
-    return INT2FIX(compiler->inline_storage_index++);
-}
-
-/******************************************************************************/
-/* Push instructions onto a compiler                                          */
-/******************************************************************************/
-
-static VALUE
-push_insn(yp_iseq_compiler_t *compiler, int stack_change, size_t size, ...) {
-    va_list opnds;
-    va_start(opnds, size);
-
-    VALUE insn = rb_ary_new_capa(size);
-    for (size_t index = 0; index < size; index++) {
-        rb_ary_push(insn, va_arg(opnds, VALUE));
-    }
-
-    va_end(opnds);
-
-    compiler->stack_size += stack_change;
-    if (compiler->stack_size > compiler->stack_max) {
-        compiler->stack_max = compiler->stack_size;
-    }
-
-    compiler->size += size;
-    rb_ary_push(compiler->insns, insn);
-    rb_ary_push(compiler->node_ids, INT2FIX(-1));
-
-    return insn;
-}
-
-static VALUE
-push_label(yp_iseq_compiler_t *compiler) {
-    VALUE label = ID2SYM(rb_intern_str(rb_sprintf("label_%zu", compiler->size)));
-    rb_ary_push(compiler->insns, label);
-    return label;
-}
-
-static void
-push_ruby_event(yp_iseq_compiler_t *compiler, yp_ruby_event_t event) {
-    switch (event) {
-        case YP_RUBY_EVENT_B_CALL:
-            rb_ary_push(compiler->insns, ID2SYM(rb_intern("RUBY_EVENT_B_CALL")));
-            break;
-        case YP_RUBY_EVENT_B_RETURN:
-            rb_ary_push(compiler->insns, ID2SYM(rb_intern("RUBY_EVENT_B_RETURN")));
-            break;
-    }
-}
-
-static inline VALUE
-push_anytostring(yp_iseq_compiler_t *compiler) {
-    return push_insn(compiler, -2 + 1, 1, ID2SYM(rb_intern("anytostring")));
-}
-
-static inline VALUE
-push_branchif(yp_iseq_compiler_t *compiler, VALUE label) {
-    return push_insn(compiler, -1 + 0, 2, ID2SYM(rb_intern("branchif")), label);
-}
-
-static inline VALUE
-push_branchunless(yp_iseq_compiler_t *compiler, VALUE label) {
-    return push_insn(compiler, -1 + 0, 2, ID2SYM(rb_intern("branchunless")), label);
-}
-
-static inline VALUE
-push_concatstrings(yp_iseq_compiler_t *compiler, int count) {
-    return push_insn(compiler, -count + 1, 2, ID2SYM(rb_intern("concatstrings")), INT2FIX(count));
-}
-
-static inline VALUE
-push_dup(yp_iseq_compiler_t *compiler) {
-    return push_insn(compiler, -1 + 2, 1, ID2SYM(rb_intern("dup")));
-}
-
-static inline VALUE
-push_getclassvariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
-    return push_insn(compiler, -0 + 1, 3, ID2SYM(rb_intern("getclassvariable")), name, inline_storage);
-}
-
-static inline VALUE
-push_getconstant(yp_iseq_compiler_t *compiler, VALUE name) {
-    return push_insn(compiler, -2 + 1, 2, ID2SYM(rb_intern("getconstant")), name);
-}
-
-static inline VALUE
-push_getglobal(yp_iseq_compiler_t *compiler, VALUE name) {
-    return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("getglobal")), name);
-}
-
-static inline VALUE
-push_getinstancevariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
-    return push_insn(compiler, -0 + 1, 3, ID2SYM(rb_intern("getinstancevariable")), name, inline_storage);
-}
-
-static inline VALUE
-push_getlocal(yp_iseq_compiler_t *compiler, VALUE index, VALUE depth) {
-    return push_insn(compiler, -0 + 1, 3, ID2SYM(rb_intern("getlocal")), index, depth);
-}
-
-static inline VALUE
-push_leave(yp_iseq_compiler_t *compiler) {
-    return push_insn(compiler, -1 + 0, 1, ID2SYM(rb_intern("leave")));
-}
-
-static inline VALUE
-push_newarray(yp_iseq_compiler_t *compiler, int count) {
-    return push_insn(compiler, -count + 1, 2, ID2SYM(rb_intern("newarray")), INT2FIX(count));
-}
-
-static inline VALUE
-push_newhash(yp_iseq_compiler_t *compiler, int count) {
-    return push_insn(compiler, -count + 1, 2, ID2SYM(rb_intern("newhash")), INT2FIX(count));
-}
-
-static inline VALUE
-push_newrange(yp_iseq_compiler_t *compiler, VALUE flag) {
-    return push_insn(compiler, -2 + 1, 2, ID2SYM(rb_intern("newrange")), flag);
-}
-
-static inline VALUE
-push_nop(yp_iseq_compiler_t *compiler) {
-    return push_insn(compiler, -2 + 1, 1, ID2SYM(rb_intern("nop")));
-}
-
-static inline VALUE
-push_objtostring(yp_iseq_compiler_t *compiler, VALUE calldata) {
-    return push_insn(compiler, -1 + 1, 2, ID2SYM(rb_intern("objtostring")), calldata);
-}
-
-static inline VALUE
-push_pop(yp_iseq_compiler_t *compiler) {
-    return push_insn(compiler, -1 + 0, 1, ID2SYM(rb_intern("pop")));
-}
-
-static inline VALUE
-push_putnil(yp_iseq_compiler_t *compiler) {
-    return push_insn(compiler, -0 + 1, 1, ID2SYM(rb_intern("putnil")));
-}
-
-static inline VALUE
-push_putobject(yp_iseq_compiler_t *compiler, VALUE value) {
-    return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("putobject")), value);
-}
-
-static inline VALUE
-push_putself(yp_iseq_compiler_t *compiler) {
-    return push_insn(compiler, -0 + 1, 1, ID2SYM(rb_intern("putself")));
-}
-
-static inline VALUE
-push_setlocal(yp_iseq_compiler_t *compiler, VALUE index, VALUE depth) {
-    return push_insn(compiler, -1 + 0, 3, ID2SYM(rb_intern("setlocal")), index, depth);
-}
-
-static const VALUE YP_SPECIALOBJECT_VMCORE = INT2FIX(1);
-static const VALUE YP_SPECIALOBJECT_CBASE = INT2FIX(2);
-// static const VALUE YP_SPECIALOBJECT_CONST_BASE = INT2FIX(3);
-
-static inline VALUE
-push_putspecialobject(yp_iseq_compiler_t *compiler, VALUE object) {
-    return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("putspecialobject")), object);
-}
-
-static inline VALUE
-push_putstring(yp_iseq_compiler_t *compiler, VALUE string) {
-    return push_insn(compiler, -0 + 1, 2, ID2SYM(rb_intern("putstring")), string);
-}
-
-static inline VALUE
-push_send(yp_iseq_compiler_t *compiler, int stack_change, VALUE calldata, VALUE block_iseq) {
-    return push_insn(compiler, stack_change, 3, ID2SYM(rb_intern("send")), calldata, block_iseq);
-}
-
-static inline VALUE
-push_setclassvariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
-    return push_insn(compiler, -1 + 0, 3, ID2SYM(rb_intern("setclassvariable")), name, inline_storage);
-}
-
-static inline VALUE
-push_setglobal(yp_iseq_compiler_t *compiler, VALUE name) {
-    return push_insn(compiler, -1 + 0, 2, ID2SYM(rb_intern("setglobal")), name);
-}
-
-static inline VALUE
-push_setinstancevariable(yp_iseq_compiler_t *compiler, VALUE name, VALUE inline_storage) {
-    return push_insn(compiler, -1 + 0, 3, ID2SYM(rb_intern("setinstancevariable")), name, inline_storage);
-}
-
-/******************************************************************************/
-/* Compile an AST node using the given compiler                               */
-/******************************************************************************/
-
-static void
-yp_compile_node(yp_iseq_compiler_t *compiler, yp_node_t *base_node) {
-    switch (base_node->type) {
-        case YP_NODE_ALIAS_NODE: {
-            yp_alias_node_t *node = (yp_alias_node_t *) base_node;
-
-            push_putspecialobject(compiler, YP_SPECIALOBJECT_VMCORE);
-            push_putspecialobject(compiler, YP_SPECIALOBJECT_CBASE);
-            yp_compile_node(compiler, node->new_name);
-            yp_compile_node(compiler, node->old_name);
-            push_send(compiler, -3, yp_calldata_new(rb_intern("core#set_method_alias"), YP_CALLDATA_ARGS_SIMPLE, 3), Qnil);
-
-            return;
-        }
-        case YP_NODE_AND_NODE: {
-            yp_and_node_t *node = (yp_and_node_t *) base_node;
-
-            yp_compile_node(compiler, node->left);
-            push_dup(compiler);
-            VALUE branchunless = push_branchunless(compiler, Qnil);
-
-            push_pop(compiler);
-            yp_compile_node(compiler, node->right);
-
-            VALUE label = push_label(compiler);
-            rb_ary_store(branchunless, 1, label);
-
-            return;
-        }
-        case YP_NODE_ARGUMENTS_NODE: {
-            yp_arguments_node_t *node = (yp_arguments_node_t *) base_node;
-            yp_node_list_t node_list = node->arguments;
-            for (size_t index = 0; index < node_list.size; index++) {
-                yp_compile_node(compiler, node_list.nodes[index]);
-            }
-            return;
-        }
-        case YP_NODE_ARRAY_NODE: {
-            yp_array_node_t *node = (yp_array_node_t *) base_node;
-            yp_node_list_t elements = node->elements;
-            for (size_t index = 0; index < elements.size; index++) {
-                yp_compile_node(compiler, elements.nodes[index]);
-            }
-            push_newarray(compiler, sizet2int(elements.size));
-            return;
-        }
-        case YP_NODE_ASSOC_NODE: {
-            yp_assoc_node_t *node = (yp_assoc_node_t *) base_node;
-            yp_compile_node(compiler, node->key);
-            yp_compile_node(compiler, node->value);
-            return;
-        }
-        case YP_NODE_BLOCK_NODE: {
-            yp_block_node_t *node = (yp_block_node_t *) base_node;
-
-            VALUE optional_labels = rb_ary_new();
-            if (node->parameters &&
-                    node->parameters->parameters &&
-                    node->parameters->parameters->optionals.size > 0) {
-                compiler->arg_size += node->parameters->parameters->optionals.size;
-
-                yp_node_list_t *optionals = &node->parameters->parameters->optionals;
-                for (size_t i = 0; i < optionals->size; i++) {
-                    VALUE label = push_label(compiler);
-                    rb_ary_push(optional_labels, label);
-                    yp_compile_node(compiler, optionals->nodes[i]);
-                }
-                VALUE label = push_label(compiler);
-                rb_ary_push(optional_labels, label);
-                rb_hash_aset(compiler->optionals, ID2SYM(rb_intern("opt")), optional_labels);
-
-                push_ruby_event(compiler, YP_RUBY_EVENT_B_CALL);
-                push_nop(compiler);
-            } else {
-                push_ruby_event(compiler, YP_RUBY_EVENT_B_CALL);
-            }
-
-
-
-            if (node->statements) {
-                yp_compile_node(compiler, node->statements);
-            } else {
-                push_putnil(compiler);
-            }
-            push_ruby_event(compiler, YP_RUBY_EVENT_B_RETURN);
-            push_leave(compiler);
-            return;
-        }
-        case YP_NODE_CALL_NODE: {
-            yp_call_node_t *node = (yp_call_node_t *) base_node;
-
-            ID mid = parse_location_symbol(&node->message_loc);
-            int flags = 0;
-            size_t orig_argc;
-
-            if (node->receiver == NULL) {
-                push_putself(compiler);
-            } else {
-                yp_compile_node(compiler, node->receiver);
-            }
-
-            if (node->arguments == NULL) {
-                if (flags & YP_CALLDATA_FCALL) flags |= YP_CALLDATA_VCALL;
-                orig_argc = 0;
-            } else {
-                yp_arguments_node_t *arguments = node->arguments;
-                yp_compile_node(compiler, (yp_node_t *) arguments);
-                orig_argc = arguments->arguments.size;
-            }
-
-            VALUE block_iseq = Qnil;
-            if (node->block != NULL) {
-                yp_iseq_compiler_t block_compiler;
-                yp_iseq_compiler_init(
-                    &block_compiler,
-                    compiler,
-                    &node->block->locals,
-                    "block in <compiled>",
-                    YP_ISEQ_TYPE_BLOCK
-                );
-
-                yp_compile_node(&block_compiler, (yp_node_t *) node->block);
-                block_iseq = yp_iseq_new(&block_compiler);
-            }
-
-            if (block_iseq == Qnil && flags == 0) {
-                flags |= YP_CALLDATA_ARGS_SIMPLE;
-            }
-
-            if (node->receiver == NULL) {
-                flags |= YP_CALLDATA_FCALL;
-
-                if (block_iseq == Qnil && node->arguments == NULL) {
-                    flags |= YP_CALLDATA_VCALL;
-                }
-            }
-
-            push_send(compiler, -sizet2int(orig_argc), yp_calldata_new(mid, flags, orig_argc), block_iseq);
-            return;
-        }
-        case YP_NODE_CLASS_VARIABLE_READ_NODE: {
-            yp_class_variable_read_node_t *node = (yp_class_variable_read_node_t *) base_node;
-            push_getclassvariable(compiler, ID2SYM(parse_node_symbol((yp_node_t *) node)), yp_inline_storage_new(compiler));
-            return;
-        }
-        case YP_NODE_CLASS_VARIABLE_WRITE_NODE: {
-            yp_class_variable_write_node_t *node = (yp_class_variable_write_node_t *) base_node;
-            if (node->value == NULL) {
-                rb_raise(rb_eNotImpError, "class variable write without value not implemented");
-            }
-
-            yp_compile_node(compiler, node->value);
-            push_dup(compiler);
-            push_setclassvariable(compiler, ID2SYM(parse_location_symbol(&node->name_loc)), yp_inline_storage_new(compiler));
-            return;
-        }
-        case YP_NODE_CONSTANT_PATH_NODE: {
-            yp_constant_path_node_t *node = (yp_constant_path_node_t *) base_node;
-            yp_compile_node(compiler, node->parent);
-            push_putobject(compiler, Qfalse);
-            push_getconstant(compiler, ID2SYM(parse_node_symbol((yp_node_t *) node->child)));
-            return;
-        }
-        case YP_NODE_CONSTANT_READ_NODE:
-            push_putnil(compiler);
-            push_putobject(compiler, Qtrue);
-            push_getconstant(compiler, ID2SYM(parse_node_symbol((yp_node_t *) base_node)));
-            return;
-        case YP_NODE_EMBEDDED_STATEMENTS_NODE: {
-            yp_embedded_statements_node_t *node = (yp_embedded_statements_node_t *) base_node;
-            yp_compile_node(compiler, (yp_node_t *) node->statements);
-            return;
-        }
-        case YP_NODE_FALSE_NODE:
-            push_putobject(compiler, Qfalse);
-            return;
-        case YP_NODE_GLOBAL_VARIABLE_READ_NODE:
-            push_getglobal(compiler, ID2SYM(parse_location_symbol(&base_node->location)));
-            return;
-        case YP_NODE_GLOBAL_VARIABLE_WRITE_NODE: {
-            yp_global_variable_write_node_t *node = (yp_global_variable_write_node_t *) base_node;
-
-            if (node->value == NULL) {
-                rb_raise(rb_eNotImpError, "global variable write without value not implemented");
-            }
-
-            yp_compile_node(compiler, node->value);
-            push_dup(compiler);
-            push_setglobal(compiler, ID2SYM(parse_location_symbol(&node->name_loc)));
-            return;
-        }
-        case YP_NODE_HASH_NODE: {
-            yp_hash_node_t *node = (yp_hash_node_t *) base_node;
-            yp_node_list_t elements = node->elements;
-
-            for (size_t index = 0; index < elements.size; index++) {
-                yp_compile_node(compiler, elements.nodes[index]);
-            }
-
-            push_newhash(compiler, sizet2int(elements.size * 2));
-            return;
-        }
-        case YP_NODE_INSTANCE_VARIABLE_READ_NODE:
-            push_getinstancevariable(compiler, ID2SYM(parse_node_symbol((yp_node_t *) base_node)), yp_inline_storage_new(compiler));
-            return;
-        case YP_NODE_INSTANCE_VARIABLE_WRITE_NODE: {
-            yp_instance_variable_write_node_t *node = (yp_instance_variable_write_node_t *) base_node;
-
-            if (node->value == NULL) {
-                rb_raise(rb_eNotImpError, "instance variable write without value not implemented");
-            }
-
-            yp_compile_node(compiler, node->value);
-            push_dup(compiler);
-            push_setinstancevariable(compiler, ID2SYM(parse_location_symbol(&node->name_loc)), yp_inline_storage_new(compiler));
-            return;
-        }
-        case YP_NODE_INTEGER_NODE:
-            push_putobject(compiler, parse_number(base_node->location.start, base_node->location.end));
-            return;
-        case YP_NODE_INTERPOLATED_STRING_NODE: {
-            yp_interpolated_string_node_t *node = (yp_interpolated_string_node_t *) base_node;
-
-            for (size_t index = 0; index < node->parts.size; index++) {
-                yp_node_t *part = node->parts.nodes[index];
-
-                switch (part->type) {
-                    case YP_NODE_STRING_NODE: {
-                        yp_string_node_t *string_node = (yp_string_node_t *) part;
-                        push_putobject(compiler, parse_string(&string_node->unescaped));
-                        break;
-                    }
-                    default:
-                        yp_compile_node(compiler, part);
-                        push_dup(compiler);
-                        push_objtostring(compiler, yp_calldata_new(rb_intern("to_s"), YP_CALLDATA_FCALL | YP_CALLDATA_ARGS_SIMPLE, 0));
-                        push_anytostring(compiler);
-                        break;
-                }
-            }
-
-            push_concatstrings(compiler, sizet2int(node->parts.size));
-            return;
-        }
-        case YP_NODE_KEYWORD_HASH_NODE: {
-            yp_keyword_hash_node_t *node = (yp_keyword_hash_node_t *) base_node;
-            yp_node_list_t elements = node->elements;
-
-            for (size_t index = 0; index < elements.size; index++) {
-                yp_compile_node(compiler, elements.nodes[index]);
-            }
-
-            push_newhash(compiler, sizet2int(elements.size * 2));
-            return;
-        }
-        case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
-            yp_local_variable_read_node_t *node = (yp_local_variable_read_node_t *) base_node;
-            int index = local_index(compiler, node->constant_id, node->depth);
-
-            push_getlocal(compiler, INT2FIX(index), INT2FIX(node->depth));
-            return;
-        }
-        case YP_NODE_LOCAL_VARIABLE_WRITE_NODE: {
-            yp_local_variable_write_node_t *node = (yp_local_variable_write_node_t *) base_node;
-
-            if (node->value == NULL) {
-                rb_raise(rb_eNotImpError, "local variable write without value not implemented");
-            }
-
-            int index = local_index(compiler, node->constant_id, node->depth);
-
-            yp_compile_node(compiler, node->value);
-            push_dup(compiler);
-            push_setlocal(compiler, INT2FIX(index), INT2FIX(node->depth));
-            return;
-        }
-        case YP_NODE_NIL_NODE:
-            push_putnil(compiler);
-            return;
-        case YP_NODE_OR_NODE: {
-            yp_or_node_t *node = (yp_or_node_t *) base_node;
-
-            yp_compile_node(compiler, node->left);
-            push_dup(compiler);
-            VALUE branchif = push_branchif(compiler, Qnil);
-
-            push_pop(compiler);
-            yp_compile_node(compiler, node->right);
-
-            VALUE label = push_label(compiler);
-            rb_ary_store(branchif, 1, label);
-
-            return;
-        }
-        case YP_NODE_PARENTHESES_NODE: {
-            yp_parentheses_node_t *node = (yp_parentheses_node_t *) base_node;
-
-            if (node->statements == NULL) {
-                push_putnil(compiler);
-            } else {
-                yp_compile_node(compiler, node->statements);
-            }
-
-            return;
-        }
-        case YP_NODE_PROGRAM_NODE: {
-            yp_program_node_t *node = (yp_program_node_t *) base_node;
-
-            if (node->statements->body.size == 0) {
-                push_putnil(compiler);
-            } else {
-                yp_compile_node(compiler, (yp_node_t *) node->statements);
-            }
-
-            push_leave(compiler);
-            return;
-        }
-        case YP_NODE_RANGE_NODE: {
-            yp_range_node_t *node = (yp_range_node_t *) base_node;
-
-            if (node->left == NULL) {
-                push_putnil(compiler);
-            } else {
-                yp_compile_node(compiler, node->left);
-            }
-
-            if (node->right == NULL) {
-                push_putnil(compiler);
-            } else {
-                yp_compile_node(compiler, node->right);
-            }
-
-            push_newrange(compiler, INT2FIX((node->operator_loc.end - node->operator_loc.start) == 3));
-            return;
-        }
-        case YP_NODE_SELF_NODE:
-            push_putself(compiler);
-            return;
-        case YP_NODE_STATEMENTS_NODE: {
-            yp_statements_node_t *node = (yp_statements_node_t *) base_node;
-            yp_node_list_t node_list = node->body;
-            for (size_t index = 0; index < node_list.size; index++) {
-                yp_compile_node(compiler, node_list.nodes[index]);
-                if (index < node_list.size - 1) push_pop(compiler);
-            }
-            return;
-        }
-        case YP_NODE_STRING_NODE: {
-            yp_string_node_t *node = (yp_string_node_t *) base_node;
-            push_putstring(compiler, parse_string(&node->unescaped));
-            return;
-        }
-        case YP_NODE_SYMBOL_NODE: {
-            yp_symbol_node_t *node = (yp_symbol_node_t *) base_node;
-            push_putobject(compiler, ID2SYM(parse_string_symbol(&node->unescaped)));
-            return;
-        }
-        case YP_NODE_TRUE_NODE:
-            push_putobject(compiler, Qtrue);
-            return;
-        case YP_NODE_UNDEF_NODE: {
-            yp_undef_node_t *node = (yp_undef_node_t *) base_node;
-
-            for (size_t index = 0; index < node->names.size; index++) {
-                push_putspecialobject(compiler, YP_SPECIALOBJECT_VMCORE);
-                push_putspecialobject(compiler, YP_SPECIALOBJECT_CBASE);
-                yp_compile_node(compiler, node->names.nodes[index]);
-                push_send(compiler, -2, yp_calldata_new(rb_intern("core#undef_method"), YP_CALLDATA_ARGS_SIMPLE, 2), Qnil);
-
-                if (index < node->names.size - 1) push_pop(compiler);
-            }
-
-            return;
-        }
-        case YP_NODE_X_STRING_NODE: {
-            yp_x_string_node_t *node = (yp_x_string_node_t *) base_node;
-            push_putself(compiler);
-            push_putobject(compiler, parse_string(&node->unescaped));
-            push_send(compiler, -1, yp_calldata_new(rb_intern("`"), YP_CALLDATA_FCALL | YP_CALLDATA_ARGS_SIMPLE, 1), Qnil);
-            return;
-        }
-        case YP_NODE_OPTIONAL_PARAMETER_NODE: {
-            yp_optional_parameter_node_t *node = (yp_optional_parameter_node_t *) base_node;
-            int depth = 0;
-            int index = local_index(compiler, node->constant_id, depth);
-            yp_compile_node(compiler, node->value);
-            push_setlocal(compiler, INT2FIX(index), INT2FIX(depth));
-            break;
-        }
-        default:
-            rb_raise(rb_eNotImpError, "node type %d not implemented", base_node->type);
-            return;
-    }
-}
-
-// This function compiles the given node into a list of instructions.
-VALUE
-yp_compile(yp_node_t *node) {
-    assert(node->type == YP_NODE_PROGRAM_NODE);
-
-    yp_iseq_compiler_t compiler;
-    yp_iseq_compiler_init(
-        &compiler,
-        NULL,
-        &((yp_program_node_t *) node)->locals,
-        "<compiled>",
-        YP_ISEQ_TYPE_TOP
-    );
-
-    yp_compile_node(&compiler, node);
-    return yp_iseq_new(&compiler);
-}
--- a/yarp/config.h
+++ b/yarp/config.h
@ -0,0 +1 @@
+#include "ruby/config.h"
--- a/yarp/defines.h
+++ b/yarp/defines.h
@ -1,8 +1,20 @@
 #ifndef YARP_DEFINES_H
 #define YARP_DEFINES_H

+// This file should be included first by any *.h or *.c in YARP
+
+#include "yarp/config.h"
+
+#include <ctype.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+
 // YP_EXPORTED_FUNCTION
-#if defined(_WIN32)
+#if defined(YP_STATIC)
+#   define YP_EXPORTED_FUNCTION
+#elif defined(_WIN32)
 #   define YP_EXPORTED_FUNCTION __declspec(dllexport) extern
 #else
 #   ifndef YP_EXPORTED_FUNCTION
@ -16,9 +28,9 @@

 // YP_ATTRIBUTE_UNUSED
 #if defined(__GNUC__)
-# define YP_ATTRIBUTE_UNUSED __attribute__((unused))
+#   define YP_ATTRIBUTE_UNUSED __attribute__((unused))
 #else
-# define YP_ATTRIBUTE_UNUSED
+#   define YP_ATTRIBUTE_UNUSED
 #endif

 // inline
@ -26,4 +38,13 @@
 #   define inline __inline
 #endif

+int yp_strncasecmp(const char *string1, const char *string2, size_t length);
+
+int yp_snprintf(char *dest, YP_ATTRIBUTE_UNUSED size_t size, const char *format, ...);
+
+#if defined(HAVE_SNPRINTF)
+    // We use snprintf if it's available
+#   define yp_snprintf snprintf
+#endif
+
 #endif
--- a/yarp/diagnostic.h
+++ b/yarp/diagnostic.h
@ -2,12 +2,11 @@
 #define YARP_DIAGNOSTIC_H

 #include "yarp/defines.h"
+#include "yarp/util/yp_list.h"

 #include <stdbool.h>
 #include <stdlib.h>

-#include "yarp/util/yp_list.h"
-
 // This struct represents a diagnostic found during parsing.
 typedef struct {
    yp_list_node_t node;
--- a/yarp/enc/yp_ascii.c
+++ b/yarp/enc/yp_ascii.c
@ -51,7 +51,8 @@ yp_encoding_t yp_encoding_ascii = {
    .char_width = yp_encoding_ascii_char_width,
    .alnum_char = yp_encoding_ascii_alnum_char,
    .alpha_char = yp_encoding_ascii_alpha_char,
-    .isupper_char = yp_encoding_ascii_isupper_char
+    .isupper_char = yp_encoding_ascii_isupper_char,
+    .multibyte = false
 };

 yp_encoding_t yp_encoding_ascii_8bit = {
@ -60,4 +61,5 @@ yp_encoding_t yp_encoding_ascii_8bit = {
    .alnum_char = yp_encoding_ascii_alnum_char,
    .alpha_char = yp_encoding_ascii_alpha_char,
    .isupper_char = yp_encoding_ascii_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_big5.c
+++ b/yarp/enc/yp_big5.c
@ -74,5 +74,6 @@ yp_encoding_t yp_encoding_big5 = {
    .char_width = yp_encoding_big5_char_width,
    .alnum_char = yp_encoding_big5_alnum_char,
    .alpha_char = yp_encoding_big5_alpha_char,
-    .isupper_char = yp_encoding_big5_isupper_char
+    .isupper_char = yp_encoding_big5_isupper_char,
+    .multibyte = true
 };
--- a/yarp/enc/yp_encoding.h
+++ b/yarp/enc/yp_encoding.h
@ -12,11 +12,28 @@
 // Each callback should return the number of bytes, or 0 if the next bytes are
 // invalid for the encoding and type.
 typedef struct {
-    const char *name;
+    // Return the number of bytes that the next character takes if it is valid
+    // in the encoding.
    size_t (*char_width)(const char *c);
+
+    // Return the number of bytes that the next character takes if it is valid
+    // in the encoding and is alphabetical.
    size_t (*alpha_char)(const char *c);
+
+    // Return the number of bytes that the next character takes if it is valid
+    // in the encoding and is alphanumeric.
    size_t (*alnum_char)(const char *c);
+
+    // Return true if the next character is valid in the encoding and is an
+    // uppercase character.
    bool (*isupper_char)(const char *c);
+
+    // The name of the encoding. This should correspond to a value that can be
+    // passed to Encoding.find in Ruby.
+    const char *name;
+
+    // Return true if the encoding is a multibyte encoding.
+    bool multibyte;
 } yp_encoding_t;

 // These bits define the location of each bit of metadata within the various
--- a/yarp/enc/yp_euc_jp.c
+++ b/yarp/enc/yp_euc_jp.c
@ -77,5 +77,6 @@ yp_encoding_t yp_encoding_euc_jp = {
    .char_width = yp_encoding_euc_jp_char_width,
    .alnum_char = yp_encoding_euc_jp_alnum_char,
    .alpha_char = yp_encoding_euc_jp_alpha_char,
-    .isupper_char = yp_encoding_euc_jp_isupper_char
+    .isupper_char = yp_encoding_euc_jp_isupper_char,
+    .multibyte = true
 };
--- a/yarp/enc/yp_gbk.c
+++ b/yarp/enc/yp_gbk.c
@ -80,5 +80,6 @@ yp_encoding_t yp_encoding_gbk = {
    .char_width = yp_encoding_gbk_char_width,
    .alnum_char = yp_encoding_gbk_alnum_char,
    .alpha_char = yp_encoding_gbk_alpha_char,
-    .isupper_char = yp_encoding_gbk_isupper_char
+    .isupper_char = yp_encoding_gbk_isupper_char,
+    .multibyte = true
 };
--- a/yarp/enc/yp_iso_8859_1.c
+++ b/yarp/enc/yp_iso_8859_1.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_1 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_1_alnum_char,
    .alpha_char = yp_encoding_iso_8859_1_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_1_isupper_char
+    .isupper_char = yp_encoding_iso_8859_1_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_10.c
+++ b/yarp/enc/yp_iso_8859_10.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_10 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_10_alnum_char,
    .alpha_char = yp_encoding_iso_8859_10_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_10_isupper_char
+    .isupper_char = yp_encoding_iso_8859_10_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_11.c
+++ b/yarp/enc/yp_iso_8859_11.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_11 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_11_alnum_char,
    .alpha_char = yp_encoding_iso_8859_11_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_11_isupper_char
+    .isupper_char = yp_encoding_iso_8859_11_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_13.c
+++ b/yarp/enc/yp_iso_8859_13.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_13 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_13_alnum_char,
    .alpha_char = yp_encoding_iso_8859_13_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_13_isupper_char
+    .isupper_char = yp_encoding_iso_8859_13_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_14.c
+++ b/yarp/enc/yp_iso_8859_14.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_14 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_14_alnum_char,
    .alpha_char = yp_encoding_iso_8859_14_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_14_isupper_char
+    .isupper_char = yp_encoding_iso_8859_14_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_15.c
+++ b/yarp/enc/yp_iso_8859_15.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_15 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_15_alnum_char,
    .alpha_char = yp_encoding_iso_8859_15_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_15_isupper_char
+    .isupper_char = yp_encoding_iso_8859_15_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_16.c
+++ b/yarp/enc/yp_iso_8859_16.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_16 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_16_alnum_char,
    .alpha_char = yp_encoding_iso_8859_16_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_16_isupper_char
+    .isupper_char = yp_encoding_iso_8859_16_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_2.c
+++ b/yarp/enc/yp_iso_8859_2.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_2 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_2_alnum_char,
    .alpha_char = yp_encoding_iso_8859_2_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_2_isupper_char
+    .isupper_char = yp_encoding_iso_8859_2_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_3.c
+++ b/yarp/enc/yp_iso_8859_3.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_3 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_3_alnum_char,
    .alpha_char = yp_encoding_iso_8859_3_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_3_isupper_char
+    .isupper_char = yp_encoding_iso_8859_3_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_4.c
+++ b/yarp/enc/yp_iso_8859_4.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_4 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_4_alnum_char,
    .alpha_char = yp_encoding_iso_8859_4_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_4_isupper_char
+    .isupper_char = yp_encoding_iso_8859_4_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_5.c
+++ b/yarp/enc/yp_iso_8859_5.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_5 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_5_alnum_char,
    .alpha_char = yp_encoding_iso_8859_5_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_5_isupper_char
+    .isupper_char = yp_encoding_iso_8859_5_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_6.c
+++ b/yarp/enc/yp_iso_8859_6.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_6 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_6_alnum_char,
    .alpha_char = yp_encoding_iso_8859_6_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_6_isupper_char
+    .isupper_char = yp_encoding_iso_8859_6_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_7.c
+++ b/yarp/enc/yp_iso_8859_7.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_7 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_7_alnum_char,
    .alpha_char = yp_encoding_iso_8859_7_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_7_isupper_char
+    .isupper_char = yp_encoding_iso_8859_7_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_8.c
+++ b/yarp/enc/yp_iso_8859_8.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_8 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_8_alnum_char,
    .alpha_char = yp_encoding_iso_8859_8_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_8_isupper_char
+    .isupper_char = yp_encoding_iso_8859_8_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_iso_8859_9.c
+++ b/yarp/enc/yp_iso_8859_9.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_iso_8859_9 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_iso_8859_9_alnum_char,
    .alpha_char = yp_encoding_iso_8859_9_alpha_char,
-    .isupper_char = yp_encoding_iso_8859_9_isupper_char
+    .isupper_char = yp_encoding_iso_8859_9_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_koi8_r.c
+++ b/yarp/enc/yp_koi8_r.c
@ -51,5 +51,6 @@ yp_encoding_t yp_encoding_koi8_r = {
    .char_width = yp_encoding_koi8_r_char_width,
    .alnum_char = yp_encoding_koi8_r_alnum_char,
    .alpha_char = yp_encoding_koi8_r_alpha_char,
-    .isupper_char = yp_encoding_koi8_r_isupper_char
+    .isupper_char = yp_encoding_koi8_r_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_shift_jis.c
+++ b/yarp/enc/yp_shift_jis.c
@ -77,5 +77,6 @@ yp_encoding_t yp_encoding_shift_jis = {
    .char_width = yp_encoding_shift_jis_char_width,
    .alnum_char = yp_encoding_shift_jis_alnum_char,
    .alpha_char = yp_encoding_shift_jis_alpha_char,
-    .isupper_char = yp_encoding_shift_jis_isupper_char
+    .isupper_char = yp_encoding_shift_jis_isupper_char,
+    .multibyte = true
 };
--- a/yarp/enc/yp_unicode.c
+++ b/yarp/enc/yp_unicode.c
@ -2230,7 +2230,7 @@ utf_8_codepoint(const unsigned char *c, size_t *width) {

        codepoint = (state != 0) ?
            (byte & 0x3fu) | (codepoint << 6) :
-            (0xff >> type) & (byte);
+            (0xffu >> type) & (byte);

        state = utf_8_dfa[256 + (state * 16) + type];
        if (!state) {
@ -2312,5 +2312,6 @@ yp_encoding_t yp_encoding_utf_8 = {
    .char_width = yp_encoding_utf_8_char_width,
    .alnum_char = yp_encoding_utf_8_alnum_char,
    .alpha_char = yp_encoding_utf_8_alpha_char,
-    .isupper_char = yp_encoding_utf_8_isupper_char
+    .isupper_char = yp_encoding_utf_8_isupper_char,
+    .multibyte = true
 };
--- a/yarp/enc/yp_windows_1251.c
+++ b/yarp/enc/yp_windows_1251.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_windows_1251 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_windows_1251_alnum_char,
    .alpha_char = yp_encoding_windows_1251_alpha_char,
-    .isupper_char = yp_encoding_windows_1251_isupper_char
+    .isupper_char = yp_encoding_windows_1251_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_windows_1252.c
+++ b/yarp/enc/yp_windows_1252.c
@ -45,5 +45,6 @@ yp_encoding_t yp_encoding_windows_1252 = {
    .char_width = yp_encoding_single_char_width,
    .alnum_char = yp_encoding_windows_1252_alnum_char,
    .alpha_char = yp_encoding_windows_1252_alpha_char,
-    .isupper_char = yp_encoding_windows_1252_isupper_char
+    .isupper_char = yp_encoding_windows_1252_isupper_char,
+    .multibyte = false
 };
--- a/yarp/enc/yp_windows_31j.c
+++ b/yarp/enc/yp_windows_31j.c
@ -77,5 +77,6 @@ yp_encoding_t yp_encoding_windows_31j = {
    .char_width = yp_encoding_windows_31j_char_width,
    .alnum_char = yp_encoding_windows_31j_alnum_char,
    .alpha_char = yp_encoding_windows_31j_alpha_char,
-    .isupper_char = yp_encoding_windows_31j_isupper_char
+    .isupper_char = yp_encoding_windows_31j_isupper_char,
+    .multibyte = true
 };
--- a/yarp/extension.c
+++ b/yarp/extension.c
@ -1,6 +1,7 @@
 #include "yarp/extension.h"

 VALUE rb_cYARP;
+VALUE rb_cYARPSource;
 VALUE rb_cYARPToken;
 VALUE rb_cYARPLocation;

@ -9,51 +10,97 @@ VALUE rb_cYARPParseError;
 VALUE rb_cYARPParseWarning;
 VALUE rb_cYARPParseResult;

-// Represents a source of Ruby code. It can either be coming from a file or a
-// string. If it's a file, it's going to mmap the contents of the file. If it's
-// a string it's going to just point to the contents of the string.
+/******************************************************************************/
+/* IO of Ruby code                                                            */
+/******************************************************************************/
+
+// Represents an input of Ruby code. It can either be coming from a file or a
+// string. If it's a file, we'll use demand paging to read the contents of the
+// file into a string. If it's already a string, we'll reference it directly.
 typedef struct {
-    enum { SOURCE_FILE, SOURCE_STRING } type;
    const char *source;
    size_t size;
-} source_t;
+} input_t;
+
+// Check if the given filepath is a string. If it's nil, then return NULL. If
+// it's not a string, then raise a type error. Otherwise return the filepath as
+// a C string.
+static const char *
+check_filepath(VALUE filepath) {
+    // If the filepath is nil, then we don't need to do anything.
+    if (NIL_P(filepath)) {
+        return NULL;
+    }
+
+    // Check if the filepath is a string. If it's not, then raise a type error.
+    if (!RB_TYPE_P(filepath, T_STRING)) {
+        rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath));
+    }
+
+    // Otherwise, return the filepath as a C string.
+    return StringValueCStr(filepath);
+}

 // Read the file indicated by the filepath parameter into source and load its
-// contents and size into the given source_t.
+// contents and size into the given input_t.
+//
+// We want to use demand paging as much as possible in order to avoid having to
+// read the entire file into memory (which could be detrimental to performance
+// for large files). This means that if we're on windows we'll use
+// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
+// `mmap`, and on other POSIX systems we'll use `read`.
 static int
-source_file_load(source_t *source, VALUE filepath) {
+input_load_filepath(input_t *input, const char *filepath) {
 #ifdef _WIN32
-    HANDLE file = CreateFile(
-        StringValueCStr(filepath),
-        GENERIC_READ,
-        0,
-        NULL,
-        OPEN_EXISTING,
-        FILE_ATTRIBUTE_NORMAL,
-        NULL
-    );
+    // Open the file for reading.
+    HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+
    if (file == INVALID_HANDLE_VALUE) {
-        perror("Invalid handle for file");
+        perror("CreateFile failed");
        return 1;
    }

+    // Get the file size.
    DWORD file_size = GetFileSize(file, NULL);
-    source->source = malloc(file_size);
+    if (file_size == INVALID_FILE_SIZE) {
+        CloseHandle(file);
+        perror("GetFileSize failed");
+        return 1;
+    }

-    DWORD bytes_read;
-    BOOL success = ReadFile(file, DISCARD_CONST_QUAL(void *, source->source), file_size, &bytes_read, NULL);
+    // If the file is empty, then we don't need to do anything else, we'll set
+    // the source to a constant empty string and return.
+    if (!file_size) {
+        CloseHandle(file);
+        input->size = 0;
+        input->source = "";
+        return 0;
+    }
+
+    // Create a mapping of the file.
+    HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
+    if (mapping == NULL) {
+        CloseHandle(file);
+        perror("CreateFileMapping failed");
+        return 1;
+    }
+
+    // Map the file into memory.
+    input->source = (const char *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
+    CloseHandle(mapping);
    CloseHandle(file);

-    if (!success) {
-        perror("ReadFile failed");
+    if (input->source == NULL) {
+        perror("MapViewOfFile failed");
        return 1;
    }

-    source->size = (size_t) file_size;
+    // Set the size of the source.
+    input->size = (size_t) file_size;
    return 0;
 #else
    // Open the file for reading
-    int fd = open(StringValueCStr(filepath), O_RDONLY);
+    int fd = open(filepath, O_RDONLY);
    if (fd == -1) {
        perror("open");
        return 1;
@ -68,30 +115,30 @@ source_file_load(source_t *source, VALUE filepath) {
    }

    // mmap the file descriptor to virtually get the contents
-    source->size = sb.st_size;
+    input->size = sb.st_size;

 #ifdef HAVE_MMAP
-    if (!source->size) {
+    if (!input->size) {
        close(fd);
-        source->source = "";
+        input->source = "";
        return 0;
    }

-    char * res = mmap(NULL, source->size, PROT_READ, MAP_PRIVATE, fd, 0);
-    if (res == MAP_FAILED) {
+    const char *result = mmap(NULL, input->size, PROT_READ, MAP_PRIVATE, fd, 0);
+    if (result == MAP_FAILED) {
        perror("Map failed");
        return 1;
    } else {
-        source->source = res;
+        input->source = result;
    }
 #else
-    source->source = malloc(source->size);
-    if (source->source == NULL) return 1;
+    input->source = malloc(input->size);
+    if (input->source == NULL) return 1;

-    ssize_t read_size = read(fd, (void *)source->source, source->size);
-    if (read_size < 0 || (size_t)read_size != source->size) {
+    ssize_t read_size = read(fd, (void *) input->source, input->size);
+    if (read_size < 0 || (size_t)read_size != input->size) {
        perror("Read size is incorrect");
-        free((void *)source->source);
+        free((void *) input->source);
        return 1;
    }
 #endif
@ -101,86 +148,106 @@ source_file_load(source_t *source, VALUE filepath) {
 #endif
 }

-// Load the contents and size of the given string into the given source_t.
+// Load the contents and size of the given string into the given input_t.
 static void
-source_string_load(source_t *source, VALUE string) {
-    *source = (source_t) {
-        .type = SOURCE_STRING,
-        .source = RSTRING_PTR(string),
-        .size = RSTRING_LEN(string),
-    };
+input_load_string(input_t *input, VALUE string) {
+    // Check if the string is a string. If it's not, then raise a type error.
+    if (!RB_TYPE_P(string, T_STRING)) {
+        rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string));
+    }
+
+    input->source = RSTRING_PTR(string);
+    input->size = RSTRING_LEN(string);
 }

-// Free any resources associated with the given source_t.
+// Free any resources associated with the given input_t. This is the corollary
+// function to source_file_load. It will unmap the file if it was mapped, or
+// free the memory if it was allocated.
 static void
-source_file_unload(source_t *source) {
-#ifdef _WIN32
-    free((void *)source->source);
+input_unload_filepath(input_t *input) {
+    // We don't need to free anything with 0 sized files because we handle that
+    // with a constant string instead.
+    if (!input->size) return;
+    void *memory = (void *) input->source;
+
+#if defined(_WIN32)
+    UnmapViewOfFile(memory);
+#elif defined(HAVE_MMAP)
+    munmap(memory, input->size);
 #else
-#ifdef HAVE_MMAP
-    munmap((void *)source->source, source->size);
-#else
-    free((void *)source->source);
-#endif
+    free(memory);
 #endif
 }

-// Dump the AST corresponding to the given source to a string.
+/******************************************************************************/
+/* Serializing the AST                                                        */
+/******************************************************************************/
+
+// Dump the AST corresponding to the given input to a string.
 static VALUE
-dump_source(source_t *source, const char *filepath) {
+dump_input(input_t *input, const char *filepath) {
+    yp_buffer_t buffer;
+    if (!yp_buffer_init(&buffer)) {
+        rb_raise(rb_eNoMemError, "failed to allocate memory");
+    }
+
    yp_parser_t parser;
-    yp_parser_init(&parser, source->source, source->size, filepath);
+    yp_parser_init(&parser, input->source, input->size, filepath);

    yp_node_t *node = yp_parse(&parser);
-
-    yp_buffer_t buffer;
-    if (!yp_buffer_init(&buffer)) rb_raise(rb_eNoMemError, "failed to allocate memory");
-
    yp_serialize(&parser, node, &buffer);
-    VALUE dumped = rb_str_new(buffer.value, buffer.length);

+    VALUE result = rb_str_new(buffer.value, buffer.length);
    yp_node_destroy(&parser, node);
    yp_buffer_free(&buffer);
    yp_parser_free(&parser);

-    return dumped;
+    return result;
 }

 // Dump the AST corresponding to the given string to a string.
 static VALUE
-dump(VALUE self, VALUE string, VALUE filepath) {
-    source_t source;
-    source_string_load(&source, string);
-    char *str = NULL;
+dump(int argc, VALUE *argv, VALUE self) {
+    VALUE string;
+    VALUE filepath;
+    rb_scan_args(argc, argv, "11", &string, &filepath);

-    if (filepath != Qnil) {
-        str = StringValueCStr(filepath);
-    }
-
-    return dump_source(&source, str);
+    input_t input;
+    input_load_string(&input, string);
+    return dump_input(&input, check_filepath(filepath));
 }

 // Dump the AST corresponding to the given file to a string.
 static VALUE
 dump_file(VALUE self, VALUE filepath) {
-    source_t source;
-    if (source_file_load(&source, filepath) != 0) return Qnil;
+    input_t input;
+
+    const char *checked = check_filepath(filepath);
+    if (input_load_filepath(&input, checked) != 0) return Qnil;
+
+    VALUE value = dump_input(&input, checked);
+    input_unload_filepath(&input);

-    VALUE value = dump_source(&source, StringValueCStr(filepath));
-    source_file_unload(&source);
    return value;
 }

+/******************************************************************************/
+/* Extracting values for the parse result                                     */
+/******************************************************************************/
+
 // Extract the comments out of the parser into an array.
 static VALUE
-parser_comments(yp_parser_t *parser) {
+parser_comments(yp_parser_t *parser, VALUE source) {
    VALUE comments = rb_ary_new();
-    yp_comment_t *comment;

-    for (comment = (yp_comment_t *) parser->comment_list.head; comment != NULL; comment = (yp_comment_t *) comment->node.next) {
-        VALUE location_argv[] = { LONG2FIX(comment->start - parser->start), LONG2FIX(comment->end - parser->start) };
+    for (yp_comment_t *comment = (yp_comment_t *) parser->comment_list.head; comment != NULL; comment = (yp_comment_t *) comment->node.next) {
+        VALUE location_argv[] = {
+            source,
+            LONG2FIX(comment->start - parser->start),
+            LONG2FIX(comment->end - parser->start)
+        };
+
        VALUE type;
-
        switch (comment->type) {
            case YP_COMMENT_INLINE:
                type = ID2SYM(rb_intern("inline"));
@ -196,7 +263,7 @@ parser_comments(yp_parser_t *parser) {
                break;
        }

-        VALUE comment_argv[] = { type, rb_class_new_instance(2, location_argv, rb_cYARPLocation) };
+        VALUE comment_argv[] = { type, rb_class_new_instance(3, location_argv, rb_cYARPLocation) };
        rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cYARPComment));
    }

@ -205,19 +272,20 @@ parser_comments(yp_parser_t *parser) {

 // Extract the errors out of the parser into an array.
 static VALUE
-parser_errors(yp_parser_t *parser, rb_encoding *encoding) {
+parser_errors(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
    VALUE errors = rb_ary_new();
    yp_diagnostic_t *error;

    for (error = (yp_diagnostic_t *) parser->error_list.head; error != NULL; error = (yp_diagnostic_t *) error->node.next) {
        VALUE location_argv[] = {
+            source,
            LONG2FIX(error->start - parser->start),
            LONG2FIX(error->end - parser->start)
        };

        VALUE error_argv[] = {
            rb_enc_str_new_cstr(error->message, encoding),
-            rb_class_new_instance(2, location_argv, rb_cYARPLocation)
+            rb_class_new_instance(3, location_argv, rb_cYARPLocation)
        };

        rb_ary_push(errors, rb_class_new_instance(2, error_argv, rb_cYARPParseError));
@ -228,19 +296,20 @@ parser_errors(yp_parser_t *parser, rb_encoding *encoding) {

 // Extract the warnings out of the parser into an array.
 static VALUE
-parser_warnings(yp_parser_t *parser, rb_encoding *encoding) {
+parser_warnings(yp_parser_t *parser, rb_encoding *encoding, VALUE source) {
    VALUE warnings = rb_ary_new();
    yp_diagnostic_t *warning;

    for (warning = (yp_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (yp_diagnostic_t *) warning->node.next) {
        VALUE location_argv[] = {
+            source,
            LONG2FIX(warning->start - parser->start),
            LONG2FIX(warning->end - parser->start)
        };

        VALUE warning_argv[] = {
            rb_enc_str_new_cstr(warning->message, encoding),
-            rb_class_new_instance(2, location_argv, rb_cYARPLocation)
+            rb_class_new_instance(3, location_argv, rb_cYARPLocation)
        };

        rb_ary_push(warnings, rb_class_new_instance(2, warning_argv, rb_cYARPParseWarning));
@ -249,22 +318,36 @@ parser_warnings(yp_parser_t *parser, rb_encoding *encoding) {
    return warnings;
 }

+/******************************************************************************/
+/* Lexing Ruby code                                                           */
+/******************************************************************************/
+
+// This struct gets stored in the parser and passed in to the lex callback any
+// time a new token is found. We use it to store the necessary information to
+// initialize a Token instance.
 typedef struct {
+    VALUE source;
    VALUE tokens;
    rb_encoding *encoding;
 } lex_data_t;

+// This is passed as a callback to the parser. It gets called every time a new
+// token is found. Once found, we initialize a new instance of Token and push it
+// onto the tokens array.
 static void
 lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
    lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;

    VALUE yields = rb_ary_new_capa(2);
-    rb_ary_push(yields, yp_token_new(parser, token, lex_data->encoding));
+    rb_ary_push(yields, yp_token_new(parser, token, lex_data->encoding, lex_data->source));
    rb_ary_push(yields, INT2FIX(parser->lex_state));

    rb_ary_push(lex_data->tokens, yields);
 }

+// This is called whenever the encoding changes based on the magic comment at
+// the top of the file. We use it to update the encoding that we are using to
+// create tokens.
 static void
 lex_encoding_changed_callback(yp_parser_t *parser) {
    lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
@ -273,30 +356,42 @@ lex_encoding_changed_callback(yp_parser_t *parser) {

 // Return an array of tokens corresponding to the given source.
 static VALUE
-lex_source(source_t *source, char *filepath) {
+lex_input(input_t *input, const char *filepath) {
    yp_parser_t parser;
-    yp_parser_init(&parser, source->source, source->size, filepath);
+    yp_parser_init(&parser, input->source, input->size, filepath);
    yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback);

+    VALUE offsets = rb_ary_new();
+    VALUE source_argv[] = { rb_str_new(input->source, input->size), offsets };
+    VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
+
    lex_data_t lex_data = {
+        .source = source,
        .tokens = rb_ary_new(),
        .encoding = rb_utf8_encoding()
    };

-    void *data = (void *) &lex_data;
+    lex_data_t *data = &lex_data;
    yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
-        .data = data,
+        .data = (void *) data,
        .callback = lex_token,
    };

    parser.lex_callback = &lex_callback;
    yp_node_t *node = yp_parse(&parser);

+    // Here we need to update the source range to have the correct newline
+    // offsets. We do it here because we've already created the object and given
+    // it over to all of the tokens.
+    for (size_t index = 0; index < parser.newline_list.size; index++) {
+        rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
+    }
+
    VALUE result_argv[] = {
        lex_data.tokens,
-        parser_comments(&parser),
-        parser_errors(&parser, lex_data.encoding),
-        parser_warnings(&parser, lex_data.encoding)
+        parser_comments(&parser, source),
+        parser_errors(&parser, lex_data.encoding, source),
+        parser_warnings(&parser, lex_data.encoding, source)
    };

    VALUE result = rb_class_new_instance(4, result_argv, rb_cYARPParseResult);
@ -309,40 +404,49 @@ lex_source(source_t *source, char *filepath) {

 // Return an array of tokens corresponding to the given string.
 static VALUE
-lex(VALUE self, VALUE string, VALUE filepath) {
-    source_t source;
-    source_string_load(&source, string);
-    char *filepath_char = NULL;
-    if (filepath) {
-        filepath_char = StringValueCStr(filepath);
-    }
-    return lex_source(&source, filepath_char);
+lex(int argc, VALUE *argv, VALUE self) {
+    VALUE string;
+    VALUE filepath;
+    rb_scan_args(argc, argv, "11", &string, &filepath);
+
+    input_t input;
+    input_load_string(&input, string);
+    return lex_input(&input, check_filepath(filepath));
 }

 // Return an array of tokens corresponding to the given file.
 static VALUE
 lex_file(VALUE self, VALUE filepath) {
-    source_t source;
-    if (source_file_load(&source, filepath) != 0) return Qnil;
+    input_t input;
+
+    const char *checked = check_filepath(filepath);
+    if (input_load_filepath(&input, checked) != 0) return Qnil;
+
+    VALUE value = lex_input(&input, checked);
+    input_unload_filepath(&input);

-    VALUE value = lex_source(&source, StringValueCStr(filepath));
-    source_file_unload(&source);
    return value;
 }

+/******************************************************************************/
+/* Parsing Ruby code                                                          */
+/******************************************************************************/
+
+// Parse the given input and return a ParseResult instance.
 static VALUE
-parse_source(source_t *source, char *filepath) {
+parse_input(input_t *input, const char *filepath) {
    yp_parser_t parser;
-    yp_parser_init(&parser, source->source, source->size, filepath);
+    yp_parser_init(&parser, input->source, input->size, filepath);

    yp_node_t *node = yp_parse(&parser);
    rb_encoding *encoding = rb_enc_find(parser.encoding.name);

+    VALUE source = yp_source_new(&parser);
    VALUE result_argv[] = {
        yp_ast_new(&parser, node, encoding),
-        parser_comments(&parser),
-        parser_errors(&parser, encoding),
-        parser_warnings(&parser, encoding)
+        parser_comments(&parser, source),
+        parser_errors(&parser, encoding, source),
+        parser_warnings(&parser, encoding, source)
    };

    VALUE result = rb_class_new_instance(4, result_argv, rb_cYARPParseResult);
@ -353,40 +457,58 @@ parse_source(source_t *source, char *filepath) {
    return result;
 }

+// Parse the given string and return a ParseResult instance.
 static VALUE
-parse(VALUE self, VALUE string, VALUE filepath) {
-    source_t source;
-    source_string_load(&source, string);
+parse(int argc, VALUE *argv, VALUE self) {
+    VALUE string;
+    VALUE filepath;
+    rb_scan_args(argc, argv, "11", &string, &filepath);
+
+    input_t input;
+    input_load_string(&input, string);
+
 #ifdef YARP_DEBUG_MODE_BUILD
-    char* dup = malloc(source.size);
-    memcpy(dup, source.source, source.size);
-    source.source = dup;
+    char* dup = malloc(input.size);
+    memcpy(dup, input.source, input.size);
+    input.source = dup;
 #endif
-    VALUE value = parse_source(&source, NIL_P(filepath) ? NULL : StringValueCStr(filepath));
+
+    VALUE value = parse_input(&input, check_filepath(filepath));
+
 #ifdef YARP_DEBUG_MODE_BUILD
    free(dup);
 #endif
+
    return value;
 }

+// Parse the given file and return a ParseResult instance.
 static VALUE
-parse_file(VALUE self, VALUE rb_filepath) {
-    source_t source;
-    if (source_file_load(&source, rb_filepath) != 0) {
-        return Qnil;
-    }
+parse_file(VALUE self, VALUE filepath) {
+    input_t input;
+
+    const char *checked = check_filepath(filepath);
+    if (input_load_filepath(&input, checked) != 0) return Qnil;
+
+    VALUE value = parse_input(&input, checked);
+    input_unload_filepath(&input);

-    VALUE value = parse_source(&source, StringValueCStr(rb_filepath));
-    source_file_unload(&source);
    return value;
 }

+/******************************************************************************/
+/* Utility functions exposed to make testing easier                           */
+/******************************************************************************/
+
+// Returns an array of strings corresponding to the named capture groups in the
+// given source string. If YARP was unable to parse the regular expression, this
+// function returns nil.
 static VALUE
-named_captures(VALUE self, VALUE rb_source) {
+named_captures(VALUE self, VALUE source) {
    yp_string_list_t string_list;
    yp_string_list_init(&string_list);

-    if (!yp_regexp_named_capture_group_names(RSTRING_PTR(rb_source), RSTRING_LEN(rb_source), &string_list)) {
+    if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list)) {
        yp_string_list_free(&string_list);
        return Qnil;
    }
@ -401,6 +523,8 @@ named_captures(VALUE self, VALUE rb_source) {
    return names;
 }

+// Accepts a source string and a type of unescaping and returns the unescaped
+// version.
 static VALUE
 unescape(VALUE source, yp_unescape_type_t unescape_type) {
    yp_string_t string;
@ -409,7 +533,13 @@ unescape(VALUE source, yp_unescape_type_t unescape_type) {
    yp_list_t error_list;
    yp_list_init(&error_list);

-    yp_unescape_manipulate_string(RSTRING_PTR(source), RSTRING_LEN(source), &string, unescape_type, &error_list);
+    const char *start = RSTRING_PTR(source);
+    size_t length = RSTRING_LEN(source);
+
+    yp_parser_t parser;
+    yp_parser_init(&parser, start, length, "");
+
+    yp_unescape_manipulate_string(&parser, start, length, &string, unescape_type, &error_list);
    if (yp_list_empty_p(&error_list)) {
        result = rb_str_new(yp_string_source(&string), yp_string_length(&string));
    } else {
@ -418,27 +548,32 @@ unescape(VALUE source, yp_unescape_type_t unescape_type) {

    yp_string_free(&string);
    yp_list_free(&error_list);
+    yp_parser_free(&parser);

    return result;
 }

+// Do not unescape anything in the given string. This is here to provide a
+// consistent API.
 static VALUE
 unescape_none(VALUE self, VALUE source) {
    return unescape(source, YP_UNESCAPE_NONE);
 }

+// Minimally unescape the given string. This means effectively unescaping just
+// the quotes of a string. Returns the unescaped string.
 static VALUE
 unescape_minimal(VALUE self, VALUE source) {
    return unescape(source, YP_UNESCAPE_MINIMAL);
 }

+// Unescape everything in the given string. Return the unescaped string.
 static VALUE
 unescape_all(VALUE self, VALUE source) {
    return unescape(source, YP_UNESCAPE_ALL);
 }

-// This function returns a hash of information about the given source string's
-// memory usage.
+// Return a hash of information about the given source string's memory usage.
 static VALUE
 memsize(VALUE self, VALUE string) {
    yp_parser_t parser;
@ -459,28 +594,17 @@ memsize(VALUE self, VALUE string) {
    return result;
 }

-static VALUE
-compile(VALUE self, VALUE string) {
-    yp_parser_t parser;
-    size_t length = RSTRING_LEN(string);
-    yp_parser_init(&parser, RSTRING_PTR(string), length, NULL);
-
-    yp_node_t *node = yp_parse(&parser);
-    VALUE result = yp_compile(node);
-
-    yp_node_destroy(&parser, node);
-    yp_parser_free(&parser);
-
-    return result;
-}
-
+// Parse the file, but do nothing with the result. This is used to profile the
+// parser for memory and speed.
 static VALUE
 profile_file(VALUE self, VALUE filepath) {
-    source_t source;
-    if (source_file_load(&source, filepath) != 0) return Qnil;
+    input_t input;
+
+    const char *checked = check_filepath(filepath);
+    if (input_load_filepath(&input, checked) != 0) return Qnil;

    yp_parser_t parser;
-    yp_parser_init(&parser, source.source, source.size, StringValueCStr(filepath));
+    yp_parser_init(&parser, input.source, input.size, checked);

    yp_node_t *node = yp_parse(&parser);
    yp_node_destroy(&parser, node);
@ -491,9 +615,8 @@ profile_file(VALUE self, VALUE filepath) {

 // The function takes a source string and returns a Ruby array containing the
 // offsets of every newline in the string. (It also includes a 0 at the
-// beginning to indicate the position of the first line.)
-//
-// It accepts a string as its only argument and returns an array of integers.
+// beginning to indicate the position of the first line.) It accepts a string as
+// its only argument and returns an array of integers.
 static VALUE
 newlines(VALUE self, VALUE string) {
    yp_parser_t parser;
@ -512,46 +635,56 @@ newlines(VALUE self, VALUE string) {
    return result;
 }

+/******************************************************************************/
+/* Initialization of the extension                                            */
+/******************************************************************************/
+
 RUBY_FUNC_EXPORTED void
 Init_yarp(void) {
+    // Make sure that the YARP library version matches the expected version.
+    // Otherwise something was compiled incorrectly.
    if (strcmp(yp_version(), EXPECTED_YARP_VERSION) != 0) {
-        rb_raise(rb_eRuntimeError, "The YARP library version (%s) does not match the expected version (%s)", yp_version(),
-                         EXPECTED_YARP_VERSION);
+        rb_raise(
+            rb_eRuntimeError,
+            "The YARP library version (%s) does not match the expected version (%s)",
+            yp_version(),
+            EXPECTED_YARP_VERSION
+        );
    }

+    // Grab up references to all of the constants that we're going to need to
+    // reference throughout this extension.
    rb_cYARP = rb_define_module("YARP");
+    rb_cYARPSource = rb_define_class_under(rb_cYARP, "Source", rb_cObject);
    rb_cYARPToken = rb_define_class_under(rb_cYARP, "Token", rb_cObject);
    rb_cYARPLocation = rb_define_class_under(rb_cYARP, "Location", rb_cObject);
-
    rb_cYARPComment = rb_define_class_under(rb_cYARP, "Comment", rb_cObject);
    rb_cYARPParseError = rb_define_class_under(rb_cYARP, "ParseError", rb_cObject);
    rb_cYARPParseWarning = rb_define_class_under(rb_cYARP, "ParseWarning", rb_cObject);
    rb_cYARPParseResult = rb_define_class_under(rb_cYARP, "ParseResult", rb_cObject);

-    rb_define_const(rb_cYARP, "VERSION", rb_sprintf("%d.%d.%d", YP_VERSION_MAJOR, YP_VERSION_MINOR, YP_VERSION_PATCH));
+    // Define the version string here so that we can use the constants defined
+    // in yarp.h.
+    rb_define_const(rb_cYARP, "VERSION", rb_str_new2(EXPECTED_YARP_VERSION));

-    rb_define_singleton_method(rb_cYARP, "dump", dump, 2);
+    // First, the functions that have to do with lexing and parsing.
+    rb_define_singleton_method(rb_cYARP, "dump", dump, -1);
    rb_define_singleton_method(rb_cYARP, "dump_file", dump_file, 1);
-
-    rb_define_singleton_method(rb_cYARP, "lex", lex, 2);
+    rb_define_singleton_method(rb_cYARP, "lex", lex, -1);
    rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
-
-    rb_define_singleton_method(rb_cYARP, "_parse", parse, 2);
+    rb_define_singleton_method(rb_cYARP, "parse", parse, -1);
    rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);

+    // Next, the functions that will be called by the parser to perform various
+    // internal tasks. We expose these to make them easier to test.
    rb_define_singleton_method(rb_cYARP, "named_captures", named_captures, 1);
-
    rb_define_singleton_method(rb_cYARP, "unescape_none", unescape_none, 1);
    rb_define_singleton_method(rb_cYARP, "unescape_minimal", unescape_minimal, 1);
    rb_define_singleton_method(rb_cYARP, "unescape_all", unescape_all, 1);
-
    rb_define_singleton_method(rb_cYARP, "memsize", memsize, 1);
-
-    rb_define_singleton_method(rb_cYARP, "compile", compile, 1);
-
    rb_define_singleton_method(rb_cYARP, "profile_file", profile_file, 1);
-
    rb_define_singleton_method(rb_cYARP, "newlines", newlines, 1);

+    // Next, initialize the pack API.
    Init_yarp_pack();
 }
--- a/yarp/extension.h
+++ b/yarp/extension.h
@ -5,11 +5,11 @@
 #include <ruby/encoding.h>
 #include "yarp.h"

-#include <fcntl.h>
-
+// The following headers are necessary to read files using demand paging.
 #ifdef _WIN32
 #include <windows.h>
 #else
+#include <fcntl.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <unistd.h>
@ -17,16 +17,11 @@

 #define EXPECTED_YARP_VERSION "0.4.0"

-VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding);
-
+VALUE yp_source_new(yp_parser_t *parser);
+VALUE yp_token_new(yp_parser_t *parser, yp_token_t *token, rb_encoding *encoding, VALUE source);
 VALUE yp_ast_new(yp_parser_t *parser, yp_node_t *node, rb_encoding *encoding);

-VALUE yp_compile(yp_node_t *node);
-
 void Init_yarp_pack(void);
-
 YP_EXPORTED_FUNCTION void Init_yarp(void);

-#define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v))
-
-#endif // YARP_EXT_NODE_H
+#endif
--- a/yarp/missing.h
+++ b/yarp/missing.h
@ -1,20 +0,0 @@
-#ifndef YARP_MISSING_H
-#define YARP_MISSING_H
-
-#include "yarp/defines.h"
-
-#include <ctype.h>
-#include <stddef.h>
-#include <string.h>
-
-const char * yp_strnstr(const char *haystack, const char *needle, size_t length);
-
-int yp_strncasecmp(const char *string1, const char *string2, size_t length);
-
-#ifndef HAVE_STRNCASECMP
-#ifndef strncasecmp
-#define strncasecmp yp_strncasecmp
-#endif
-#endif
-
-#endif
--- a/yarp/node.h
+++ b/yarp/node.h
@ -2,8 +2,6 @@
 #define YARP_NODE_H

 #include "yarp/defines.h"
-
-#include "yarp.h"
 #include "yarp/parser.h"

 // Append a token to the given list.
@ -15,6 +13,20 @@ void yp_node_list_append(yp_node_list_t *list, yp_node_t *node);
 // Clear the node but preserves the location.
 void yp_node_clear(yp_node_t *node);

+// Deallocate a node and all of its children.
+YP_EXPORTED_FUNCTION void yp_node_destroy(yp_parser_t *parser, struct yp_node *node);
+
+// This struct stores the information gathered by the yp_node_memsize function.
+// It contains both the memory footprint and additionally metadata about the
+// shape of the tree.
+typedef struct {
+    size_t memsize;
+    size_t node_count;
+} yp_memsize_t;
+
+// Calculates the memory footprint of a given node.
+YP_EXPORTED_FUNCTION void yp_node_memsize(yp_node_t *node, yp_memsize_t *memsize);
+
 #define YP_EMPTY_NODE_LIST ((yp_node_list_t) { .nodes = NULL, .size = 0, .capacity = 0 })
 #define YP_EMPTY_LOCATION_LIST ((yp_location_list_t) { .locations = NULL, .size = 0, .capacity = 0 })

--- a/yarp/pack.h
+++ b/yarp/pack.h
@ -3,8 +3,8 @@

 #include "yarp/defines.h"

-#include <stdlib.h>
 #include <stdint.h>
+#include <stdlib.h>

 typedef enum yp_pack_version {
    YP_PACK_VERSION_3_2_0
--- a/yarp/parser.h
+++ b/yarp/parser.h
@ -1,17 +1,16 @@
 #ifndef YARP_PARSER_H
 #define YARP_PARSER_H

-#include "yarp/defines.h"
-
-#include <stdbool.h>
-
 #include "yarp/ast.h"
+#include "yarp/defines.h"
 #include "yarp/enc/yp_encoding.h"
 #include "yarp/util/yp_constant_pool.h"
 #include "yarp/util/yp_list.h"
 #include "yarp/util/yp_newline_list.h"
 #include "yarp/util/yp_state_stack.h"

+#include <stdbool.h>
+
 // This enum provides various bits that represent different kinds of states that
 // the lexer can track. This is used to determine which kind of token to return
 // based on the context of the parser.
--- a/yarp/prettyprint.c
+++ b/yarp/prettyprint.c
@ -5,6 +5,8 @@
 /* if you are looking to modify the                                           */
 /* template                                                                   */
 /******************************************************************************/
+#include "yarp/defines.h"
+
 #include <stdio.h>

 #include "yarp/ast.h"
@ -14,7 +16,7 @@
 static void
 prettyprint_location(yp_buffer_t *buffer, yp_parser_t *parser, yp_location_t *location) {
    char printed[] = "[0000-0000]";
-    sprintf(printed, "[%04ld-%04ld]", (long int)(location->start - parser->start), (long int)(location->end - parser->start));
+    yp_snprintf(printed, sizeof(printed), "[%04ld-%04ld]", (long int)(location->start - parser->start), (long int)(location->end - parser->start));
    yp_buffer_append_str(buffer, printed, strlen(printed));
 }

@ -189,7 +191,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
                        for (uint32_t index = 0; index < ((yp_block_node_t *)node)->locals.size; index++) {
                if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
                char locals_buffer[12];
-                sprintf(locals_buffer, "%u", ((yp_block_node_t *)node)->locals.ids[index]);
+                yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_block_node_t *)node)->locals.ids[index]);
                yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
            }
            yp_buffer_append_str(buffer, ", ", 2);            if (((yp_block_node_t *)node)->parameters == NULL) {
@ -291,7 +293,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
                prettyprint_node(buffer, parser, (yp_node_t *)((yp_call_node_t *)node)->block);
            }
            yp_buffer_append_str(buffer, ", ", 2);            char flags_buffer[12];
-            sprintf(flags_buffer, "+%d", ((yp_call_node_t *)node)->flags);
+            yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_call_node_t *)node)->flags);
            yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
            yp_buffer_append_str(buffer, ", ", 2);            yp_buffer_append_str(buffer, "\"", 1);
            yp_buffer_append_str(buffer, yp_string_source(&((yp_call_node_t *)node)->name), yp_string_length(&((yp_call_node_t *)node)->name));
@ -321,7 +323,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_call_operator_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_call_operator_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char operator_id_buffer[12];
-            sprintf(operator_id_buffer, "%u", ((yp_call_operator_write_node_t *)node)->operator_id);
+            yp_snprintf(operator_id_buffer, sizeof(operator_id_buffer), "%u", ((yp_call_operator_write_node_t *)node)->operator_id);
            yp_buffer_append_str(buffer, operator_id_buffer, strlen(operator_id_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -360,7 +362,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
                        for (uint32_t index = 0; index < ((yp_class_node_t *)node)->locals.size; index++) {
                if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
                char locals_buffer[12];
-                sprintf(locals_buffer, "%u", ((yp_class_node_t *)node)->locals.ids[index]);
+                yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_class_node_t *)node)->locals.ids[index]);
                yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
            }
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_class_node_t *)node)->class_keyword_loc);
@ -406,7 +408,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_class_variable_operator_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_class_variable_operator_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char operator_buffer[12];
-            sprintf(operator_buffer, "%u", ((yp_class_variable_operator_write_node_t *)node)->operator);
+            yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_class_variable_operator_write_node_t *)node)->operator);
            yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -454,7 +456,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_constant_operator_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_constant_operator_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char operator_buffer[12];
-            sprintf(operator_buffer, "%u", ((yp_constant_operator_write_node_t *)node)->operator);
+            yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_constant_operator_write_node_t *)node)->operator);
            yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -493,7 +495,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_constant_path_operator_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_constant_path_operator_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char operator_buffer[12];
-            sprintf(operator_buffer, "%u", ((yp_constant_path_operator_write_node_t *)node)->operator);
+            yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_constant_path_operator_write_node_t *)node)->operator);
            yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -540,7 +542,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            for (uint32_t index = 0; index < ((yp_def_node_t *)node)->locals.size; index++) {
                if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
                char locals_buffer[12];
-                sprintf(locals_buffer, "%u", ((yp_def_node_t *)node)->locals.ids[index]);
+                yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_def_node_t *)node)->locals.ids[index]);
                yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
            }
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_def_node_t *)node)->def_keyword_loc);
@ -734,7 +736,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_global_variable_operator_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_global_variable_operator_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char operator_buffer[12];
-            sprintf(operator_buffer, "%u", ((yp_global_variable_operator_write_node_t *)node)->operator);
+            yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_global_variable_operator_write_node_t *)node)->operator);
            yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -871,7 +873,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_instance_variable_operator_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_instance_variable_operator_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char operator_buffer[12];
-            sprintf(operator_buffer, "%u", ((yp_instance_variable_operator_write_node_t *)node)->operator);
+            yp_snprintf(operator_buffer, sizeof(operator_buffer), "%u", ((yp_instance_variable_operator_write_node_t *)node)->operator);
            yp_buffer_append_str(buffer, operator_buffer, strlen(operator_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -911,7 +913,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            }
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_interpolated_regular_expression_node_t *)node)->closing_loc);
            yp_buffer_append_str(buffer, ", ", 2);            char flags_buffer[12];
-            sprintf(flags_buffer, "+%d", ((yp_interpolated_regular_expression_node_t *)node)->flags);
+            yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_interpolated_regular_expression_node_t *)node)->flags);
            yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -1001,7 +1003,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
                        for (uint32_t index = 0; index < ((yp_lambda_node_t *)node)->locals.size; index++) {
                if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
                char locals_buffer[12];
-                sprintf(locals_buffer, "%u", ((yp_lambda_node_t *)node)->locals.ids[index]);
+                yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_lambda_node_t *)node)->locals.ids[index]);
                yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
            }
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_lambda_node_t *)node)->opening_loc);
@ -1024,7 +1026,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_local_variable_operator_and_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_and_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char constant_id_buffer[12];
-            sprintf(constant_id_buffer, "%u", ((yp_local_variable_operator_and_write_node_t *)node)->constant_id);
+            yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_operator_and_write_node_t *)node)->constant_id);
            yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -1035,7 +1037,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_local_variable_operator_or_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_or_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char constant_id_buffer[12];
-            sprintf(constant_id_buffer, "%u", ((yp_local_variable_operator_or_write_node_t *)node)->constant_id);
+            yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_operator_or_write_node_t *)node)->constant_id);
            yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -1046,10 +1048,10 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_local_variable_operator_write_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_local_variable_operator_write_node_t *)node)->value);
            yp_buffer_append_str(buffer, ", ", 2);            char constant_id_buffer[12];
-            sprintf(constant_id_buffer, "%u", ((yp_local_variable_operator_write_node_t *)node)->constant_id);
+            yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_operator_write_node_t *)node)->constant_id);
            yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
            yp_buffer_append_str(buffer, ", ", 2);            char operator_id_buffer[12];
-            sprintf(operator_id_buffer, "%u", ((yp_local_variable_operator_write_node_t *)node)->operator_id);
+            yp_snprintf(operator_id_buffer, sizeof(operator_id_buffer), "%u", ((yp_local_variable_operator_write_node_t *)node)->operator_id);
            yp_buffer_append_str(buffer, operator_id_buffer, strlen(operator_id_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -1057,10 +1059,10 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
        case YP_NODE_LOCAL_VARIABLE_READ_NODE: {
            yp_buffer_append_str(buffer, "LocalVariableReadNode(", 22);
                        char constant_id_buffer[12];
-            sprintf(constant_id_buffer, "%u", ((yp_local_variable_read_node_t *)node)->constant_id);
+            yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_read_node_t *)node)->constant_id);
            yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
            yp_buffer_append_str(buffer, ", ", 2);            char depth_buffer[12];
-            sprintf(depth_buffer, "+%d", ((yp_local_variable_read_node_t *)node)->depth);
+            yp_snprintf(depth_buffer, sizeof(depth_buffer), "+%d", ((yp_local_variable_read_node_t *)node)->depth);
            yp_buffer_append_str(buffer, depth_buffer, strlen(depth_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -1068,10 +1070,10 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
        case YP_NODE_LOCAL_VARIABLE_WRITE_NODE: {
            yp_buffer_append_str(buffer, "LocalVariableWriteNode(", 23);
                        char constant_id_buffer[12];
-            sprintf(constant_id_buffer, "%u", ((yp_local_variable_write_node_t *)node)->constant_id);
+            yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_local_variable_write_node_t *)node)->constant_id);
            yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
            yp_buffer_append_str(buffer, ", ", 2);            char depth_buffer[12];
-            sprintf(depth_buffer, "+%d", ((yp_local_variable_write_node_t *)node)->depth);
+            yp_snprintf(depth_buffer, sizeof(depth_buffer), "+%d", ((yp_local_variable_write_node_t *)node)->depth);
            yp_buffer_append_str(buffer, depth_buffer, strlen(depth_buffer));
            yp_buffer_append_str(buffer, ", ", 2);            if (((yp_local_variable_write_node_t *)node)->value == NULL) {
                yp_buffer_append_str(buffer, "nil", 3);
@ -1113,7 +1115,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
                        for (uint32_t index = 0; index < ((yp_module_node_t *)node)->locals.size; index++) {
                if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
                char locals_buffer[12];
-                sprintf(locals_buffer, "%u", ((yp_module_node_t *)node)->locals.ids[index]);
+                yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_module_node_t *)node)->locals.ids[index]);
                yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
            }
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_module_node_t *)node)->module_keyword_loc);
@ -1187,7 +1189,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
        case YP_NODE_OPTIONAL_PARAMETER_NODE: {
            yp_buffer_append_str(buffer, "OptionalParameterNode(", 22);
                        char constant_id_buffer[12];
-            sprintf(constant_id_buffer, "%u", ((yp_optional_parameter_node_t *)node)->constant_id);
+            yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_optional_parameter_node_t *)node)->constant_id);
            yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_optional_parameter_node_t *)node)->name_loc);
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_optional_parameter_node_t *)node)->operator_loc);
@ -1298,7 +1300,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
                        for (uint32_t index = 0; index < ((yp_program_node_t *)node)->locals.size; index++) {
                if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
                char locals_buffer[12];
-                sprintf(locals_buffer, "%u", ((yp_program_node_t *)node)->locals.ids[index]);
+                yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_program_node_t *)node)->locals.ids[index]);
                yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
            }
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_node(buffer, parser, (yp_node_t *)((yp_program_node_t *)node)->statements);
@ -1319,7 +1321,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            }
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_range_node_t *)node)->operator_loc);
            yp_buffer_append_str(buffer, ", ", 2);            char flags_buffer[12];
-            sprintf(flags_buffer, "+%d", ((yp_range_node_t *)node)->flags);
+            yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_range_node_t *)node)->flags);
            yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -1344,7 +1346,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
            yp_buffer_append_str(buffer, yp_string_source(&((yp_regular_expression_node_t *)node)->unescaped), yp_string_length(&((yp_regular_expression_node_t *)node)->unescaped));
            yp_buffer_append_str(buffer, "\"", 1);
            yp_buffer_append_str(buffer, ", ", 2);            char flags_buffer[12];
-            sprintf(flags_buffer, "+%d", ((yp_regular_expression_node_t *)node)->flags);
+            yp_snprintf(flags_buffer, sizeof(flags_buffer), "+%d", ((yp_regular_expression_node_t *)node)->flags);
            yp_buffer_append_str(buffer, flags_buffer, strlen(flags_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -1363,7 +1365,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
        case YP_NODE_REQUIRED_PARAMETER_NODE: {
            yp_buffer_append_str(buffer, "RequiredParameterNode(", 22);
                        char constant_id_buffer[12];
-            sprintf(constant_id_buffer, "%u", ((yp_required_parameter_node_t *)node)->constant_id);
+            yp_snprintf(constant_id_buffer, sizeof(constant_id_buffer), "%u", ((yp_required_parameter_node_t *)node)->constant_id);
            yp_buffer_append_str(buffer, constant_id_buffer, strlen(constant_id_buffer));
            yp_buffer_append_str(buffer, ")", 1);
            break;
@ -1443,7 +1445,7 @@ prettyprint_node(yp_buffer_t *buffer, yp_parser_t *parser, yp_node_t *node) {
                        for (uint32_t index = 0; index < ((yp_singleton_class_node_t *)node)->locals.size; index++) {
                if (index != 0) yp_buffer_append_str(buffer, ", ", 2);
                char locals_buffer[12];
-                sprintf(locals_buffer, "%u", ((yp_singleton_class_node_t *)node)->locals.ids[index]);
+                yp_snprintf(locals_buffer, sizeof(locals_buffer), "%u", ((yp_singleton_class_node_t *)node)->locals.ids[index]);
                yp_buffer_append_str(buffer, locals_buffer, strlen(locals_buffer));
            }
            yp_buffer_append_str(buffer, ", ", 2);            prettyprint_location(buffer, parser, &((yp_singleton_class_node_t *)node)->class_keyword_loc);
--- a/yarp/regexp.c
+++ b/yarp/regexp.c
@ -374,7 +374,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
            case '#': { // inline comments
                bool found = yp_regexp_char_find(parser, ')');
                // the close paren we found is escaped, we need to find another
-                while (parser->start <= parser->cursor - 2 && *(parser->cursor - 2) == '\\') {
+                while (found && (parser->start <= parser->cursor - 2) && (*(parser->cursor - 2) == '\\')) {
                    found = yp_regexp_char_find(parser, ')');
                }
                return found;
--- a/yarp/regexp.h
+++ b/yarp/regexp.h
@ -2,15 +2,14 @@
 #define YARP_REGEXP_H

 #include "yarp/defines.h"
-
 #include "yarp/parser.h"
+#include "yarp/util/yp_string_list.h"
+#include "yarp/util/yp_string.h"
+
 #include <stdbool.h>
 #include <stddef.h>
 #include <string.h>

-#include "yarp/util/yp_string_list.h"
-#include "yarp/util/yp_string.h"
-
 // Parse a regular expression and extract the names of all of the named capture
 // groups.
 YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures);
--- a/yarp/unescape.c
+++ b/yarp/unescape.c
@ -438,14 +438,14 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
 // \c? or \C-?    delete, ASCII 7Fh (DEL)
 //
 YP_EXPORTED_FUNCTION void
-yp_unescape_manipulate_string(const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list) {
+yp_unescape_manipulate_string(yp_parser_t *parser, const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list) {
    if (unescape_type == YP_UNESCAPE_NONE) {
        // If we're not unescaping then we can reference the source directly.
        yp_string_shared_init(string, value, value + length);
        return;
    }

-    const char *backslash = memchr(value, '\\', length);
+    const char *backslash = yp_memchr(parser, value, '\\', length);

    if (backslash == NULL) {
        // Here there are no escapes, so we can reference the source directly.
@ -509,7 +509,7 @@ yp_unescape_manipulate_string(const char *value, size_t length, yp_string_t *str
        }

        if (end > cursor) {
-            backslash = memchr(cursor, '\\', (size_t) (end - cursor));
+            backslash = yp_memchr(parser, cursor, '\\', (size_t) (end - cursor));
        } else {
            backslash = NULL;
        }
--- a/yarp/unescape.h
+++ b/yarp/unescape.h
@ -2,17 +2,18 @@
 #define YARP_UNESCAPE_H

 #include "yarp/defines.h"
+#include "yarp/diagnostic.h"
+#include "yarp/parser.h"
+#include "yarp/util/yp_char.h"
+#include "yarp/util/yp_list.h"
+#include "yarp/util/yp_memchr.h"
+#include "yarp/util/yp_string.h"

 #include <assert.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <string.h>

-#include "yarp/diagnostic.h"
-#include "yarp/util/yp_char.h"
-#include "yarp/util/yp_list.h"
-#include "yarp/util/yp_string.h"
-
 // The type of unescape we are performing.
 typedef enum {
    // When we're creating a string inside of a list literal like %w, we
@ -30,7 +31,7 @@ typedef enum {

 // Unescape the contents of the given token into the given string using the
 // given unescape mode.
-YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list);
+YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser, const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list);

 YP_EXPORTED_FUNCTION size_t yp_unescape_calculate_difference(const char *value, const char *end, yp_unescape_type_t unescape_type, bool expect_single_codepoint, yp_list_t *error_list);

--- a/yarp/util/yp_char.h
+++ b/yarp/util/yp_char.h
@ -2,12 +2,11 @@
 #define YP_CHAR_H

 #include "yarp/defines.h"
+#include "yarp/util/yp_newline_list.h"

 #include <stdbool.h>
 #include <stddef.h>

-#include "yarp/util/yp_newline_list.h"
-
 // Returns the number of characters at the start of the string that are
 // whitespace. Disallows searching past the given maximum number of characters.
 size_t yp_strspn_whitespace(const char *string, ptrdiff_t length);
--- a/yarp/util/yp_constant_pool.h
+++ b/yarp/util/yp_constant_pool.h
@ -6,13 +6,13 @@
 #ifndef YP_CONSTANT_POOL_H
 #define YP_CONSTANT_POOL_H

+#include "yarp/defines.h"
+
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>

-#include "yarp/defines.h"
-
 typedef uint32_t yp_constant_id_t;

 typedef struct {
--- a/yarp/util/yp_memchr.c
+++ b/yarp/util/yp_memchr.c
@ -0,0 +1,31 @@
+#include "yarp/util/yp_memchr.h"
+
+#define YP_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
+
+// We need to roll our own memchr to handle cases where the encoding changes and
+// we need to search for a character in a buffer that could be the trailing byte
+// of a multibyte character.
+void *
+yp_memchr(yp_parser_t *parser, const void *memory, int character, size_t number) {
+    if (parser->encoding_changed && parser->encoding.multibyte && character >= YP_MEMCHR_TRAILING_BYTE_MINIMUM) {
+        const char *source = (const char *) memory;
+        size_t index = 0;
+
+        while (index < number) {
+            if (source[index] == character) {
+                return (void *) (source + index);
+            }
+
+            size_t width = parser->encoding.char_width(source + index);
+            if (width == 0) {
+                return NULL;
+            }
+
+            index += width;
+        }
+
+        return NULL;
+    } else {
+        return memchr(memory, character, number);
+    }
+}
--- a/yarp/util/yp_memchr.h
+++ b/yarp/util/yp_memchr.h
@ -0,0 +1,14 @@
+#ifndef YP_MEMCHR_H
+#define YP_MEMCHR_H
+
+#include "yarp/defines.h"
+#include "yarp/parser.h"
+
+#include <stddef.h>
+
+// We need to roll our own memchr to handle cases where the encoding changes and
+// we need to search for a character in a buffer that could be the trailing byte
+// of a multibyte character.
+void * yp_memchr(yp_parser_t *parser, const void *source, int character, size_t number);
+
+#endif
--- a/yarp/util/yp_newline_list.c
+++ b/yarp/util/yp_newline_list.c
@ -31,7 +31,7 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
    }

    assert(cursor >= list->start);
-    list->offsets[list->size++] = (size_t) (cursor - list->start);
+    list->offsets[list->size++] = (size_t) (cursor - list->start + 1);

    return true;
 }
--- a/yarp/util/yp_newline_list.h
+++ b/yarp/util/yp_newline_list.h
@ -9,13 +9,13 @@
 #ifndef YP_NEWLINE_LIST_H
 #define YP_NEWLINE_LIST_H

-#include <assert.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <stdlib.h>
-
 #include "yarp/defines.h"

+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+
 // A list of offsets of newlines in a string. The offsets are assumed to be
 // sorted/inserted in ascending order.
 typedef struct {
--- a/yarp/util/yp_snprintf.c
+++ b/yarp/util/yp_snprintf.c
@ -0,0 +1,14 @@
+#include "yarp/defines.h"
+
+#ifndef HAVE_SNPRINTF
+// In case snprintf isn't present on the system, we provide our own that simply
+// forwards to the less-safe sprintf.
+int
+yp_snprintf(char *dest, YP_ATTRIBUTE_UNUSED size_t size, const char *format, ...) {
+    va_list args;
+    va_start(args, format);
+    int result = vsprintf(dest, format, args);
+    va_end(args);
+    return result;
+}
+#endif
--- a/yarp/util/yp_string.c
+++ b/yarp/util/yp_string.c
@ -1,11 +1,5 @@
 #include "yarp/util/yp_string.h"

-// Allocate a new yp_string_t.
-yp_string_t *
-yp_string_alloc(void) {
-    return (yp_string_t *) malloc(sizeof(yp_string_t));
-}
-
 // Initialize a shared string that is based on initial input.
 void
 yp_string_shared_init(yp_string_t *string, const char *start, const char *end) {
--- a/yarp/util/yp_string.h
+++ b/yarp/util/yp_string.h
@ -29,9 +29,6 @@ typedef struct {
    } as;
 } yp_string_t;

-// Allocate a new yp_string_t.
-yp_string_t * yp_string_alloc(void);
-
 // Initialize a shared string that is based on initial input.
 void yp_string_shared_init(yp_string_t *string, const char *start, const char *end);

--- a/yarp/util/yp_string_list.h
+++ b/yarp/util/yp_string_list.h
@ -2,12 +2,11 @@
 #define YARP_STRING_LIST_H

 #include "yarp/defines.h"
+#include "yarp/util/yp_string.h"

 #include <stddef.h>
 #include <stdlib.h>

-#include "yarp/util/yp_string.h"
-
 typedef struct {
    yp_string_t *strings;
    size_t length;
--- a/yarp/util/yp_strncasecmp.c
+++ b/yarp/util/yp_strncasecmp.c
@ -1,19 +1,5 @@
-#include "yarp/missing.h"
-
-const char *
-yp_strnstr(const char *haystack, const char *needle, size_t length) {
-    size_t needle_length = strlen(needle);
-    if (needle_length > length) return NULL;
-
-    const char *haystack_limit = haystack + length - needle_length + 1;
-
-    while ((haystack = memchr(haystack, needle[0], (size_t) (haystack_limit - haystack))) != NULL) {
-        if (!strncmp(haystack, needle, needle_length)) return haystack;
-        haystack++;
-    }
-
-    return NULL;
-}
+#include <ctype.h>
+#include <stddef.h>

 int
 yp_strncasecmp(const char *string1, const char *string2, size_t length) {
--- a/yarp/util/yp_strpbrk.c
+++ b/yarp/util/yp_strpbrk.c
@ -1,5 +1,42 @@
 #include "yarp/util/yp_strpbrk.h"

+// This is the slow path that does care about the encoding.
+static inline const char *
+yp_strpbrk_multi_byte(yp_parser_t *parser, const char *source, const char *charset, size_t maximum) {
+    size_t index = 0;
+
+    while (index < maximum) {
+        if (strchr(charset, source[index]) != NULL) {
+            return source + index;
+        }
+
+        size_t width = parser->encoding.char_width(source + index);
+        if (width == 0) {
+            return NULL;
+        }
+
+        index += width;
+    }
+
+    return NULL;
+}
+
+// This is the fast path that does not care about the encoding.
+static inline const char *
+yp_strpbrk_single_byte(const char *source, const char *charset, size_t maximum) {
+    size_t index = 0;
+
+    while (index < maximum) {
+        if (strchr(charset, source[index]) != NULL) {
+            return source + index;
+        }
+
+        index++;
+    }
+
+    return NULL;
+}
+
 // Here we have rolled our own version of strpbrk. The standard library strpbrk
 // has undefined behavior when the source string is not null-terminated. We want
 // to support strings that are not null-terminated because yp_parse does not
@ -12,19 +49,18 @@
 // also don't want it to stop on null bytes. Ruby actually allows null bytes
 // within strings, comments, regular expressions, etc. So we need to be able to
 // skip past them.
+//
+// Finally, we want to support encodings wherein the charset could contain
+// characters that are trailing bytes of multi-byte characters. For example, in
+// Shift-JIS, the backslash character can be a trailing byte. In that case we
+// need to take a slower path and iterate one multi-byte character at a time.
 const char *
-yp_strpbrk(const char *source, const char *charset, ptrdiff_t length) {
-    if (length < 0) return NULL;
-
-    size_t index = 0;
-    size_t maximum = (size_t) length;
-
-    while (index < maximum) {
-        if (strchr(charset, source[index]) != NULL) {
-            return &source[index];
-        }
-        index++;
+yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length) {
+    if (length <= 0) {
+        return NULL;
+    } else if (parser->encoding_changed && parser->encoding.multibyte) {
+        return yp_strpbrk_multi_byte(parser, source, charset, (size_t) length);
+    } else {
+        return yp_strpbrk_single_byte(source, charset, (size_t) length);
    }
-
-    return NULL;
 }
--- a/yarp/util/yp_strpbrk.h
+++ b/yarp/util/yp_strpbrk.h
@ -2,6 +2,7 @@
 #define YP_STRPBRK_H

 #include "yarp/defines.h"
+#include "yarp/parser.h"

 #include <stddef.h>
 #include <string.h>
@ -18,6 +19,11 @@
 // also don't want it to stop on null bytes. Ruby actually allows null bytes
 // within strings, comments, regular expressions, etc. So we need to be able to
 // skip past them.
-const char * yp_strpbrk(const char *source, const char *charset, ptrdiff_t length);
+//
+// Finally, we want to support encodings wherein the charset could contain
+// characters that are trailing bytes of multi-byte characters. For example, in
+// Shift-JIS, the backslash character can be a trailing byte. In that case we
+// need to take a slower path and iterate one multi-byte character at a time.
+const char * yp_strpbrk(yp_parser_t *parser, const char *source, const char *charset, ptrdiff_t length);

 #endif
--- a/yarp/version.h
+++ b/yarp/version.h
@ -0,0 +1,5 @@
+#define YP_VERSION_MAJOR 0
+#define YP_VERSION_MINOR 4
+#define YP_VERSION_PATCH 0
+
+#define YP_VERSION "0.4.0"
--- a/yarp/yarp.c
+++ b/yarp/yarp.c
@ -1,16 +1,19 @@
 #include "yarp.h"
+#include "yarp/version.h"

-#define YP_STRINGIZE0(expr) #expr
-#define YP_STRINGIZE(expr) YP_STRINGIZE0(expr)
-#define YP_VERSION_MACRO YP_STRINGIZE(YP_VERSION_MAJOR) "." YP_STRINGIZE(YP_VERSION_MINOR) "." YP_STRINGIZE(YP_VERSION_PATCH)
-
-#define YP_TAB_WHITESPACE_SIZE 8
-
+// The YARP version and the serialization format.
 const char *
 yp_version(void) {
-    return YP_VERSION_MACRO;
+    return YP_VERSION;
 }

+// In heredocs, tabs automatically complete up to the next 8 spaces. This is
+// defined in CRuby as TAB_WIDTH.
+#define YP_TAB_WHITESPACE_SIZE 8
+
+// Debugging logging will provide you will additional debugging functions as
+// well as automatically replace some functions with their debugging
+// counterparts.
 #ifndef YP_DEBUG_LOGGING
 #define YP_DEBUG_LOGGING 0
 #endif
@ -442,6 +445,7 @@ not_provided(yp_parser_t *parser) {
    return (yp_token_t) { .type = YP_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
 }

+#define YP_EMPTY_STRING ((yp_string_t) { .type = YP_STRING_SHARED, .as.shared.start = NULL, .as.shared.end = NULL })
 #define YP_LOCATION_NULL_VALUE(parser) ((yp_location_t) { .start = parser->start, .end = parser->start })
 #define YP_LOCATION_TOKEN_VALUE(token) ((yp_location_t) { .start = (token)->start, .end = (token)->end })
 #define YP_LOCATION_NODE_VALUE(node) ((yp_location_t) { .start = (node)->location.start, .end = (node)->location.end })
@ -675,7 +679,9 @@ yp_array_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *node
        .constant = NULL,
        .rest = NULL,
        .requireds = YP_EMPTY_NODE_LIST,
-        .posts = YP_EMPTY_NODE_LIST
+        .posts = YP_EMPTY_NODE_LIST,
+        .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
+        .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
    };

    // For now we're going to just copy over each pointer manually. This could be
@ -684,7 +690,7 @@ yp_array_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *node
    for (size_t index = 0; index < nodes->size; index++) {
        yp_node_t *child = nodes->nodes[index];

-        if (child->type == YP_NODE_SPLAT_NODE) {
+        if (!found_rest && child->type == YP_NODE_SPLAT_NODE) {
            node->rest = child;
            found_rest = true;
        } else if (found_rest) {
@ -710,7 +716,9 @@ yp_array_pattern_node_rest_create(yp_parser_t *parser, yp_node_t *rest) {
        .constant = NULL,
        .rest = rest,
        .requireds = YP_EMPTY_NODE_LIST,
-        .posts = YP_EMPTY_NODE_LIST
+        .posts = YP_EMPTY_NODE_LIST,
+        .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
+        .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
    };

    return node;
@ -1885,7 +1893,9 @@ yp_find_pattern_node_create(yp_parser_t *parser, yp_node_list_t *nodes) {
        .constant = NULL,
        .left = left,
        .right = right,
-        .requireds = YP_EMPTY_NODE_LIST
+        .requireds = YP_EMPTY_NODE_LIST,
+        .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
+        .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
    };

    // For now we're going to just copy over each pointer manually. This could be
@ -2018,7 +2028,9 @@ yp_hash_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *assoc
        },
        .constant = NULL,
        .kwrest = NULL,
-        .assocs = YP_EMPTY_NODE_LIST
+        .assocs = YP_EMPTY_NODE_LIST,
+        .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
+        .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
    };

    for (size_t index = 0; index < assocs->size; index++) {
@ -3709,7 +3721,8 @@ yp_string_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_t
        },
        .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
        .content_loc = YP_LOCATION_TOKEN_VALUE(content),
-        .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing)
+        .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
+        .unescaped = YP_EMPTY_STRING
    };

    return node;
@ -3766,7 +3779,8 @@ yp_symbol_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_t
        },
        .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
        .value_loc = YP_LOCATION_TOKEN_VALUE(value),
-        .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing)
+        .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
+        .unescaped = YP_EMPTY_STRING
    };

    return node;
@ -3788,7 +3802,7 @@ yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
            ptrdiff_t length = label.end - label.start;
            assert(length >= 0);

-            yp_unescape_manipulate_string(label.start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
+            yp_unescape_manipulate_string(parser, label.start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
            break;
        }
        case YP_TOKEN_MISSING: {
@ -4073,7 +4087,8 @@ yp_xstring_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_
        },
        .opening_loc = YP_LOCATION_TOKEN_VALUE(opening),
        .content_loc = YP_LOCATION_TOKEN_VALUE(content),
-        .closing_loc = YP_LOCATION_TOKEN_VALUE(closing)
+        .closing_loc = YP_LOCATION_TOKEN_VALUE(closing),
+        .unescaped = YP_EMPTY_STRING
    };

    return node;
@ -4113,6 +4128,7 @@ yp_yield_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_lo
 }


+#undef YP_EMPTY_STRING
 #undef YP_LOCATION_NULL_VALUE
 #undef YP_LOCATION_TOKEN_VALUE
 #undef YP_LOCATION_NODE_VALUE
@ -4331,6 +4347,17 @@ peek(yp_parser_t *parser) {
    }
 }

+// Get the next string of length len in the source starting from parser->current.end.
+// If the string extends beyond the end of the source, return the empty string ""
+static inline const char*
+peek_string(yp_parser_t *parser, size_t len) {
+    if (parser->current.end + len <= parser->end) {
+        return parser->current.end;
+    } else {
+        return "";
+    }
+}
+
 // If the character to be read matches the given value, then returns true and
 // advanced the current pointer.
 static inline bool
@ -4342,22 +4369,53 @@ match(yp_parser_t *parser, char value) {
    return false;
 }

+// Skip to the next newline character or NUL byte.
+static inline const char *
+next_newline(const char *cursor, ptrdiff_t length) {
+    assert(length >= 0);
+
+    // Note that it's okay for us to use memchr here to look for \n because none
+    // of the encodings that we support have \n as a component of a multi-byte
+    // character.
+    return memchr(cursor, '\n', (size_t) length);
+}
+
+// Find the start of the encoding comment. This is effectively an inlined
+// version of strnstr with some modifications.
+static inline const char *
+parser_lex_encoding_comment_start(yp_parser_t *parser, const char *cursor, ptrdiff_t remaining) {
+    assert(remaining >= 0);
+    size_t length = (size_t) remaining;
+
+    size_t key_length = strlen("coding:");
+    if (key_length > length) return NULL;
+
+    const char *cursor_limit = cursor + length - key_length + 1;
+    while ((cursor = yp_memchr(parser, cursor, 'c', (size_t) (cursor_limit - cursor))) != NULL) {
+        if (
+            (strncmp(cursor, "coding", key_length - 1) == 0) &&
+            (cursor[key_length - 1] == ':' || cursor[key_length - 1] == '=')
+        ) {
+            return cursor + key_length;
+        }
+
+        cursor++;
+    }
+
+    return NULL;
+}
+
 // Here we're going to check if this is a "magic" comment, and perform whatever
 // actions are necessary for it here.
 static void
 parser_lex_encoding_comment(yp_parser_t *parser) {
    const char *start = parser->current.start + 1;
-    const char *end = memchr(start, '\n', (size_t) (parser->end - start));
+    const char *end = next_newline(start, parser->end - start);
    if (end == NULL) end = parser->end;

    // These are the patterns we're going to match to find the encoding comment.
    // This is definitely not complete or even really correct.
-    const char *encoding_start = NULL;
-    if ((encoding_start = yp_strnstr(start, "coding:", (size_t) (end - start))) != NULL) {
-        encoding_start += 7;
-    } else if ((encoding_start = yp_strnstr(start, "coding=", (size_t) (end - start))) != NULL) {
-        encoding_start += 7;
-    }
+    const char *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);

    // If we didn't find anything that matched our patterns, then return. Note
    // that this does a _very_ poor job of actually finding the encoding, and
@ -4370,7 +4428,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {

    // Now determine the end of the encoding string. This is either the end of
    // the line, the first whitespace character, or a punctuation mark.
-    const char *encoding_end = yp_strpbrk(encoding_start, " \t\f\r\v\n;,", end - encoding_start);
+    const char *encoding_end = yp_strpbrk(parser, encoding_start, " \t\f\r\v\n;,", end - encoding_start);
    encoding_end = encoding_end == NULL ? end : encoding_end;

    // Finally, we can determine the width of the encoding string.
@ -4392,7 +4450,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
    // Extensions like utf-8 can contain extra encoding details like,
    // utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
    // treat any encoding starting utf-8 as utf-8.
-    if (strncasecmp(encoding_start, "utf-8", 5) == 0) {
+    if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, "utf-8", 5) == 0)) {
        // We don't need to do anything here because the default encoding is
        // already UTF-8. We'll just return.
        return;
@ -4401,7 +4459,7 @@ parser_lex_encoding_comment(yp_parser_t *parser) {
    // Next, we're going to loop through each of the encodings that we handle
    // explicitly. If we found one that we understand, we'll use that value.
 #define ENCODING(value, prebuilt) \
-    if (width == sizeof(value) - 1 && strncasecmp(encoding_start, value, sizeof(value) - 1) == 0) { \
+    if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, value, width) == 0) { \
        parser->encoding = prebuilt; \
        parser->encoding_changed |= true; \
        if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
@ -4866,7 +4924,8 @@ static yp_token_type_t
 lex_keyword(yp_parser_t *parser, const char *value, yp_lex_state_t state, yp_token_type_t type, yp_token_type_t modifier_type) {
    yp_lex_state_t last_state = parser->lex_state;

-    if (strncmp(parser->current.start, value, strlen(value)) == 0) {
+    const size_t vlen = strlen(value);
+    if (parser->current.start + vlen <= parser->end && strncmp(parser->current.start, value, vlen) == 0) {
        if (parser->lex_state & YP_LEX_STATE_FNAME) {
            lex_state_set(parser, YP_LEX_STATE_ENDFN);
        } else {
@ -5275,7 +5334,7 @@ parser_comment(yp_parser_t *parser, yp_comment_type_t type) {
 static yp_token_type_t
 lex_embdoc(yp_parser_t *parser) {
    // First, lex out the EMBDOC_BEGIN token.
-    const char *newline = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
+    const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);

    if (newline == NULL) {
        parser->current.end = parser->end;
@ -5300,7 +5359,7 @@ lex_embdoc(yp_parser_t *parser) {
        // token here.
        if (strncmp(parser->current.end, "=end", 4) == 0 &&
                (parser->current.end + 4 == parser->end || yp_char_is_whitespace(parser->current.end[4]))) {
-            const char *newline = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
+            const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);

            if (newline == NULL) {
                parser->current.end = parser->end;
@ -5320,7 +5379,7 @@ lex_embdoc(yp_parser_t *parser) {

        // Otherwise, we'll parse until the end of the line and return a line of
        // embedded documentation.
-        const char *newline = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
+        const char *newline = next_newline(parser->current.end, parser->end - parser->current.end);

        if (newline == NULL) {
            parser->current.end = parser->end;
@ -5466,9 +5525,9 @@ parser_lex(yp_parser_t *parser) {
                    LEX(YP_TOKEN_EOF);

                case '#': { // comments
-                    const char *ending = memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
+                    const char *ending = next_newline(parser->current.end, parser->end - parser->current.end);
                    while (ending && ending < parser->end && *ending != '\n') {
-                        ending = memchr(ending + 1, '\n', (size_t) (parser->end - ending));
+                        ending = next_newline(ending + 1, parser->end - ending);
                    }

                    parser->current.end = ending == NULL ? parser->end : ending + 1;
@ -5540,7 +5599,7 @@ parser_lex(yp_parser_t *parser) {
                        // Otherwise we'll return a regular newline.
                        if (next_content[0] == '#') {
                            // Here we look for a "." or "&." following a "\n".
-                            const char *following = memchr(next_content, '\n', (size_t) (parser->end - next_content));
+                            const char *following = next_newline(next_content, parser->end - next_content);

                            while (following && (following < parser->end)) {
                                following++;
@ -5552,7 +5611,7 @@ parser_lex(yp_parser_t *parser) {

                                // If there is a comment, then we need to find the end of the
                                // comment and continue searching from there.
-                                following = memchr(following, '\n', (size_t) (parser->end - following));
+                                following = next_newline(following, parser->end - following);
                            }

                            // If the lex state was ignored, or we hit a '.' or a '&.',
@ -5785,7 +5844,7 @@ parser_lex(yp_parser_t *parser) {

                // = => =~ == === =begin
                case '=':
-                    if (current_token_starts_line(parser) && strncmp(parser->current.end, "begin", 5) == 0 && yp_char_is_whitespace(parser->current.end[5])) {
+                    if (current_token_starts_line(parser) && strncmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_at(parser, 5))) {
                        yp_token_type_t type = lex_embdoc(parser);

                        if (type == YP_TOKEN_EOF) {
@ -5848,19 +5907,21 @@ parser_lex(yp_parser_t *parser) {
                            const char *ident_start = parser->current.end;
                            size_t width = 0;

-                            if (quote == YP_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
+                            if (parser->current.end >= parser->end) {
+                                parser->current.end = end;
+                            } else if (quote == YP_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
                                parser->current.end = end;
                            } else {
                                if (quote == YP_HEREDOC_QUOTE_NONE) {
                                    parser->current.end += width;

-                                    while ((width = char_is_identifier(parser, parser->current.end))) {
+                                    while ((parser->current.end < parser->end) && (width = char_is_identifier(parser, parser->current.end))) {
                                        parser->current.end += width;
                                    }
                                } else {
                                    // If we have quotes, then we're going to go until we find the
                                    // end quote.
-                                    while (parser->current.end < parser->end && quote != (yp_heredoc_quote_t) (*parser->current.end)) {
+                                    while ((parser->current.end < parser->end) && quote != (yp_heredoc_quote_t) (*parser->current.end)) {
                                        parser->current.end++;
                                    }
                                }
@ -5882,7 +5943,7 @@ parser_lex(yp_parser_t *parser) {
                                });

                                if (parser->heredoc_end == NULL) {
-                                    const char *body_start = (const char *) memchr(parser->current.end, '\n', (size_t) (parser->end - parser->current.end));
+                                    const char *body_start = next_newline(parser->current.end, parser->end - parser->current.end);

                                    if (body_start == NULL) {
                                        // If there is no newline after the heredoc identifier, then
@ -6465,13 +6526,13 @@ parser_lex(yp_parser_t *parser) {
            // Here we'll get a list of the places where strpbrk should break,
            // and then find the first one.
            const char *breakpoints = parser->lex_modes.current->as.list.breakpoints;
-            const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
+            const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);

            while (breakpoint != NULL) {
                switch (*breakpoint) {
                    case '\0':
                        // If we hit a null byte, skip directly past it.
-                        breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                        break;
                    case '\\': {
                        // If we hit escapes, then we need to treat the next token
@ -6492,7 +6553,7 @@ parser_lex(yp_parser_t *parser) {
                            yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
                        }

-                        breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
+                        breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
                        break;
                    }
                    case ' ':
@ -6517,7 +6578,7 @@ parser_lex(yp_parser_t *parser) {
                            // that looked like an interpolated class or instance variable
                            // like "#@" but wasn't actually. In this case we'll just skip
                            // to the next breakpoint.
-                            breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
+                            breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                            break;
                        }
                    }
@ -6526,7 +6587,7 @@ parser_lex(yp_parser_t *parser) {
                        if (*breakpoint == parser->lex_modes.current->as.list.incrementor) {
                            // If we've hit the incrementor, then we need to skip past it and
                            // find the next breakpoint.
-                            breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                            breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                            parser->lex_modes.current->as.list.nesting++;
                            break;
                        }
@ -6537,7 +6598,7 @@ parser_lex(yp_parser_t *parser) {
                        // If this terminator doesn't actually close the list, then we need
                        // to continue on past it.
                        if (parser->lex_modes.current->as.list.nesting > 0) {
-                            breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                            breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                            parser->lex_modes.current->as.list.nesting--;
                            break;
                        }
@ -6577,13 +6638,13 @@ parser_lex(yp_parser_t *parser) {
            // regular expression. We'll use strpbrk to find the first of these
            // characters.
            const char *breakpoints = parser->lex_modes.current->as.regexp.breakpoints;
-            const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
+            const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);

            while (breakpoint != NULL) {
                switch (*breakpoint) {
                    case '\0':
                        // If we hit a null byte, skip directly past it.
-                        breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                        break;
                    case '\\': {
                        // If we hit escapes, then we need to treat the next token
@ -6597,7 +6658,7 @@ parser_lex(yp_parser_t *parser) {
                            yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
                        }

-                        breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
+                        breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
                        break;
                    }
                    case '#': {
@ -6613,7 +6674,7 @@ parser_lex(yp_parser_t *parser) {
                            // that looked like an interpolated class or instance variable
                            // like "#@" but wasn't actually. In this case we'll just skip
                            // to the next breakpoint.
-                            breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
+                            breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                            break;
                        }
                    }
@ -6622,7 +6683,7 @@ parser_lex(yp_parser_t *parser) {
                        if (*breakpoint == parser->lex_modes.current->as.regexp.incrementor) {
                            // If we've hit the incrementor, then we need to skip past it and
                            // find the next breakpoint.
-                            breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                            breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                            parser->lex_modes.current->as.regexp.nesting++;
                            break;
                        }
@ -6635,7 +6696,7 @@ parser_lex(yp_parser_t *parser) {
                            if (parser->lex_modes.current->as.regexp.terminator != '\n') {
                                // If the terminator is not a newline, then we
                                // can set the next breakpoint and continue.
-                                breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                                breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                                break;
                            }

@ -6646,7 +6707,7 @@ parser_lex(yp_parser_t *parser) {
                        assert(*breakpoint == parser->lex_modes.current->as.regexp.terminator);

                        if (parser->lex_modes.current->as.regexp.nesting > 0) {
-                            breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                            breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                            parser->lex_modes.current->as.regexp.nesting--;
                            break;
                        }
@ -6694,7 +6755,7 @@ parser_lex(yp_parser_t *parser) {
            // These are the places where we need to split up the content of the
            // string. We'll use strpbrk to find the first of these characters.
            const char *breakpoints = parser->lex_modes.current->as.string.breakpoints;
-            const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
+            const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);

            while (breakpoint != NULL) {
                // If we hit the incrementor, then we'll increment then nesting and
@ -6704,7 +6765,7 @@ parser_lex(yp_parser_t *parser) {
                    *breakpoint == parser->lex_modes.current->as.string.incrementor
                ) {
                    parser->lex_modes.current->as.string.nesting++;
-                    breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                    breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                    continue;
                }

@ -6715,7 +6776,7 @@ parser_lex(yp_parser_t *parser) {
                    // If this terminator doesn't actually close the string, then we need
                    // to continue on past it.
                    if (parser->lex_modes.current->as.string.nesting > 0) {
-                        breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                        parser->lex_modes.current->as.string.nesting--;
                        continue;
                    }
@ -6762,7 +6823,7 @@ parser_lex(yp_parser_t *parser) {
                if (*breakpoint == '\n') {
                    if (parser->heredoc_end == NULL) {
                        yp_newline_list_append(&parser->newline_list, breakpoint);
-                        breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                        continue;
                    } else {
                        parser->current.end = breakpoint + 1;
@ -6774,7 +6835,7 @@ parser_lex(yp_parser_t *parser) {
                switch (*breakpoint) {
                    case '\0':
                        // Skip directly past the null character.
-                        breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                        break;
                    case '\\': {
                        // If we hit escapes, then we need to treat the next token
@ -6789,7 +6850,7 @@ parser_lex(yp_parser_t *parser) {
                            yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
                        }

-                        breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
+                        breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
                        break;
                    }
                    case '#': {
@ -6802,7 +6863,7 @@ parser_lex(yp_parser_t *parser) {
                        // looked like an interpolated class or instance variable like "#@"
                        // but wasn't actually. In this case we'll just skip to the next
                        // breakpoint.
-                        breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
+                        breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                        break;
                    }
                    default:
@ -6844,7 +6905,7 @@ parser_lex(yp_parser_t *parser) {
                    start += yp_strspn_inline_whitespace(start, parser->end - start);
                }

-                if (strncmp(start, ident_start, ident_length) == 0) {
+                if ((start + ident_length <= parser->end) && (strncmp(start, ident_start, ident_length) == 0)) {
                    bool matched = true;
                    bool at_end = false;

@ -6888,13 +6949,13 @@ parser_lex(yp_parser_t *parser) {
                breakpoints[2] = '\0';
            }

-            const char *breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
+            const char *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);

            while (breakpoint != NULL) {
                switch (*breakpoint) {
                    case '\0':
                        // Skip directly past the null character.
-                        breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                        break;
                    case '\n': {
                        yp_newline_list_append(&parser->newline_list, breakpoint);
@ -6939,7 +7000,7 @@ parser_lex(yp_parser_t *parser) {

                        // Otherwise we hit a newline and it wasn't followed by a
                        // terminator, so we can continue parsing.
-                        breakpoint = yp_strpbrk(breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
                        break;
                    }
                    case '\\': {
@ -6956,7 +7017,7 @@ parser_lex(yp_parser_t *parser) {
                                yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
                            }

-                            breakpoint = yp_strpbrk(breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
+                            breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
                        }
                        break;
                    }
@ -6970,7 +7031,7 @@ parser_lex(yp_parser_t *parser) {
                        // that looked like an interpolated class or instance variable
                        // like "#@" but wasn't actually. In this case we'll just skip
                        // to the next breakpoint.
-                        breakpoint = yp_strpbrk(parser->current.end, breakpoints, parser->end - parser->current.end);
+                        breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                        break;
                    }
                    default:
@ -7007,7 +7068,7 @@ yp_regular_expression_node_create_and_unescape(yp_parser_t *parser, const yp_tok
    ptrdiff_t length = content->end - content->start;
    assert(length >= 0);

-    yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
+    yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
    return node;
 }

@ -7018,7 +7079,7 @@ yp_symbol_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
    ptrdiff_t length = content->end - content->start;
    assert(length >= 0);

-    yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
+    yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
    return node;
 }

@ -7029,7 +7090,7 @@ yp_string_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
    ptrdiff_t length = content->end - content->start;
    assert(length >= 0);

-    yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
+    yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, unescape_type, &parser->error_list);
    return node;
 }

@ -7040,7 +7101,7 @@ yp_xstring_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openi
    ptrdiff_t length = content->end - content->start;
    assert(length >= 0);

-    yp_unescape_manipulate_string(content->start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
+    yp_unescape_manipulate_string(parser, content->start, (size_t) length, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
    return node;
 }

@ -7505,10 +7566,10 @@ parse_target(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_no
                // the previous method name in, and append an =.
                size_t length = yp_string_length(&call->name);

-                char *name = malloc(length + 2);
+                char *name = calloc(length + 2, sizeof(char));
                if (name == NULL) return NULL;

-                sprintf(name, "%.*s=", (int) length, yp_string_source(&call->name));
+                yp_snprintf(name, length + 2, "%.*s=", (int) length, yp_string_source(&call->name));

                // Now switch the name to the new string.
                yp_string_free(&call->name);
@ -8954,9 +9015,11 @@ parse_string_part(yp_parser_t *parser) {

 static yp_node_t *
 parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_state) {
+    bool lex_string = lex_mode->mode == YP_LEX_STRING;
+    bool lex_interpolation = lex_string && lex_mode->as.string.interpolation;
    yp_token_t opening = parser->previous;

-    if (lex_mode->mode != YP_LEX_STRING) {
+    if (!lex_string) {
        if (next_state != YP_LEX_STATE_NONE) {
            lex_state_set(parser, next_state);
        }
@ -8990,9 +9053,9 @@ parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_s
    }

    // If we weren't in a string in the previous check then we have to be now.
-    assert(lex_mode->mode == YP_LEX_STRING);
+    assert(lex_string);

-    if (lex_mode->as.string.interpolation) {
+    if (lex_interpolation) {
        yp_interpolated_symbol_node_t *interpolated = yp_interpolated_symbol_node_create(parser, &opening, NULL, &opening);

        while (!match_any_type_p(parser, 2, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) {
@ -9043,9 +9106,10 @@ parse_undef_argument(yp_parser_t *parser) {
            return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL);
        }
        case YP_TOKEN_SYMBOL_BEGIN: {
-            yp_lex_mode_t *lex_mode = parser->lex_modes.current;
+            yp_lex_mode_t lex_mode = *parser->lex_modes.current;
            parser_lex(parser);
-            return parse_symbol(parser, lex_mode, YP_LEX_STATE_NONE);
+
+            return parse_symbol(parser, &lex_mode, YP_LEX_STATE_NONE);
        }
        default:
            yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Expected a bare word or symbol argument.");
@ -9075,10 +9139,10 @@ parse_alias_argument(yp_parser_t *parser, bool first) {
            return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL);
        }
        case YP_TOKEN_SYMBOL_BEGIN: {
-            yp_lex_mode_t *lex_mode = parser->lex_modes.current;
+            yp_lex_mode_t lex_mode = *parser->lex_modes.current;
            parser_lex(parser);

-            return parse_symbol(parser, lex_mode, first ? YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM : YP_LEX_STATE_NONE);
+            return parse_symbol(parser, &lex_mode, first ? YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM : YP_LEX_STATE_NONE);
        }
        case YP_TOKEN_BACK_REFERENCE:
            parser_lex(parser);
@ -9177,7 +9241,7 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
                    common_whitespace = cur_whitespace;
                }

-                cur_char = memchr(cur_char + 1, '\n', (size_t) (parser->end - (cur_char + 1)));
+                cur_char = next_newline(cur_char + 1, parser->end - (cur_char + 1));
                if (cur_char) cur_char++;
            }
        }
@ -9252,7 +9316,7 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu

            // At this point we have dedented all that we need to, so we need to find
            // the next newline.
-            const char *breakpoint = memchr(source_cursor, '\n', (size_t) (source_end - source_cursor));
+            const char *breakpoint = next_newline(source_cursor, source_end - source_cursor);

            if (breakpoint == NULL) {
                // If there isn't another newline, then we can just move the rest of the
@ -9293,92 +9357,106 @@ parse_pattern_constant_path(yp_parser_t *parser, yp_node_t *node) {
    // If there is a [ or ( that follows, then this is part of a larger pattern
    // expression. We'll parse the inner pattern here, then modify the returned
    // inner pattern with our constant path attached.
-    if (match_any_type_p(parser, 2, YP_TOKEN_BRACKET_LEFT, YP_TOKEN_PARENTHESIS_LEFT)) {
-        yp_token_t opening;
-        yp_token_t closing;
-        yp_node_t *inner = NULL;
-
-        if (accept(parser, YP_TOKEN_BRACKET_LEFT)) {
-            opening = parser->previous;
-
-            accept(parser, YP_TOKEN_NEWLINE);
-
-            if (!accept(parser, YP_TOKEN_BRACKET_RIGHT)) {
-                inner = parse_pattern(parser, true, "Expected a pattern expression after the [ operator.");
-                accept(parser, YP_TOKEN_NEWLINE);
-
-                expect(parser, YP_TOKEN_BRACKET_RIGHT, "Expected a ] to close the pattern expression.");
-            }
-
-            closing = parser->previous;
-        } else {
-            parser_lex(parser);
-            opening = parser->previous;
-
-            if (!accept(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
-                inner = parse_pattern(parser, true, "Expected a pattern expression after the ( operator.");
-                expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected a ) to close the pattern expression.");
-            }
-
-            closing = parser->previous;
-        }
-
-        if (inner) {
-            // Now that we have the inner pattern, check to see if it's an array, find,
-            // or hash pattern. If it is, then we'll attach our constant path to it. If
-            // it's not, then we'll create an array pattern.
-            switch (inner->type) {
-                case YP_NODE_ARRAY_PATTERN_NODE: {
-                    yp_array_pattern_node_t *pattern_node = (yp_array_pattern_node_t *)inner;
-                    pattern_node->base.location.start = node->location.start;
-                    pattern_node->base.location.end = closing.end;
-
-                    pattern_node->constant = node;
-                    pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
-                    pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
-
-                    node = (yp_node_t *)pattern_node;
-                    break;
-                }
-                case YP_NODE_FIND_PATTERN_NODE: {
-                    yp_find_pattern_node_t *pattern_node = (yp_find_pattern_node_t *) inner;
-                    pattern_node->base.location.start = node->location.start;
-                    pattern_node->base.location.end = closing.end;
-
-                    pattern_node->constant = node;
-                    pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
-                    pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
-
-                    node = (yp_node_t *) pattern_node;
-                    break;
-                }
-                case YP_NODE_HASH_PATTERN_NODE: {
-                    yp_hash_pattern_node_t *pattern_node = (yp_hash_pattern_node_t *)inner;
-                    pattern_node->base.location.start = node->location.start;
-                    pattern_node->base.location.end = closing.end;
-
-                    pattern_node->constant = node;
-                    pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
-                    pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
-
-                    node = (yp_node_t *) pattern_node;
-                    break;
-                }
-                default: {
-                    yp_array_pattern_node_t *pattern_node = yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
-                    yp_array_pattern_node_requireds_append(pattern_node, inner);
-                    node = (yp_node_t *)pattern_node;
-                    break;
-                }
-            }
-        } else {
-            // If there was no inner pattern, then we have something like Foo() or
-            // Foo[]. In that case we'll create an array pattern with no requireds.
-            node = (yp_node_t *)yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
-        }
+    if (!match_any_type_p(parser, 2, YP_TOKEN_BRACKET_LEFT, YP_TOKEN_PARENTHESIS_LEFT)) {
+        return node;
    }

-    return node;
+    yp_token_t opening;
+    yp_token_t closing;
+    yp_node_t *inner = NULL;
+
+    if (accept(parser, YP_TOKEN_BRACKET_LEFT)) {
+        opening = parser->previous;
+        accept(parser, YP_TOKEN_NEWLINE);
+
+        if (!accept(parser, YP_TOKEN_BRACKET_RIGHT)) {
+            inner = parse_pattern(parser, true, "Expected a pattern expression after the [ operator.");
+            accept(parser, YP_TOKEN_NEWLINE);
+            expect(parser, YP_TOKEN_BRACKET_RIGHT, "Expected a ] to close the pattern expression.");
+        }
+
+        closing = parser->previous;
+    } else {
+        parser_lex(parser);
+        opening = parser->previous;
+
+        if (!accept(parser, YP_TOKEN_PARENTHESIS_RIGHT)) {
+            inner = parse_pattern(parser, true, "Expected a pattern expression after the ( operator.");
+            expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected a ) to close the pattern expression.");
+        }
+
+        closing = parser->previous;
+    }
+
+    if (!inner) {
+        // If there was no inner pattern, then we have something like Foo() or
+        // Foo[]. In that case we'll create an array pattern with no requireds.
+        return (yp_node_t *) yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
+    }
+
+    // Now that we have the inner pattern, check to see if it's an array, find,
+    // or hash pattern. If it is, then we'll attach our constant path to it if
+    // it doesn't already have a constant. If it's not one of those node types
+    // or it does have a constant, then we'll create an array pattern.
+    switch (inner->type) {
+        case YP_NODE_ARRAY_PATTERN_NODE: {
+            yp_array_pattern_node_t *pattern_node = (yp_array_pattern_node_t *) inner;
+
+            if (pattern_node->constant == NULL) {
+                pattern_node->base.location.start = node->location.start;
+                pattern_node->base.location.end = closing.end;
+
+                pattern_node->constant = node;
+                pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
+                pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
+
+                return (yp_node_t *) pattern_node;
+            }
+
+            break;
+        }
+        case YP_NODE_FIND_PATTERN_NODE: {
+            yp_find_pattern_node_t *pattern_node = (yp_find_pattern_node_t *) inner;
+
+            if (pattern_node->constant == NULL) {
+                pattern_node->base.location.start = node->location.start;
+                pattern_node->base.location.end = closing.end;
+
+                pattern_node->constant = node;
+                pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
+                pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
+
+                return (yp_node_t *) pattern_node;
+            }
+
+            break;
+        }
+        case YP_NODE_HASH_PATTERN_NODE: {
+            yp_hash_pattern_node_t *pattern_node = (yp_hash_pattern_node_t *) inner;
+
+            if (pattern_node->constant == NULL) {
+                pattern_node->base.location.start = node->location.start;
+                pattern_node->base.location.end = closing.end;
+
+                pattern_node->constant = node;
+                pattern_node->opening_loc = (yp_location_t) { .start = opening.start, .end = opening.end };
+                pattern_node->closing_loc = (yp_location_t) { .start = closing.start, .end = closing.end };
+
+                return (yp_node_t *) pattern_node;
+            }
+
+            break;
+        }
+        default:
+            break;
+    }
+
+    // If we got here, then we didn't return one of the inner patterns by
+    // attaching its constant. In this case we'll create an array pattern and
+    // attach our constant to it.
+    yp_array_pattern_node_t *pattern_node = yp_array_pattern_node_constant_create(parser, node, &opening, &closing);
+    yp_array_pattern_node_requireds_append(pattern_node, inner);
+    return (yp_node_t *) pattern_node;
 }

 // Parse a rest pattern.
@ -9897,8 +9975,6 @@ parse_pattern(yp_parser_t *parser, bool top_pattern, const char *message) {
 // Parse an expression that begins with the previous node that we just lexed.
 static inline yp_node_t *
 parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
-    yp_lex_mode_t *lex_mode = parser->lex_modes.current;
-
    switch (parser->current.type) {
        case YP_TOKEN_BRACKET_LEFT_ARRAY: {
            parser_lex(parser);
@ -11015,7 +11091,10 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
                    lex_state_set(parser, YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM);
                    parser_lex(parser);
                    name = parse_undef_argument(parser);
-                    if (name->type == YP_NODE_MISSING_NODE) break;
+                    if (name->type == YP_NODE_MISSING_NODE) {
+                        yp_node_destroy(parser, name);
+                        break;
+                    }

                    yp_undef_node_append(undef, name);
                }
@ -11043,6 +11122,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
                    receiver = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, "Expected expression after `not`.");

                    if (!parser->recovering) {
+                        accept(parser, YP_TOKEN_NEWLINE);
                        expect(parser, YP_TOKEN_PARENTHESIS_RIGHT, "Expected ')' after 'not' expression.");
                        arguments.closing_loc = ((yp_location_t) { .start = parser->previous.start, .end = parser->previous.end });
                    }
@ -11727,9 +11807,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
            return (yp_node_t *) node;
        }
        case YP_TOKEN_STRING_BEGIN: {
+            assert(parser->lex_modes.current->mode == YP_LEX_STRING);
+            bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
+
+            yp_token_t opening = parser->current;
            parser_lex(parser);

-            yp_token_t opening = parser->previous;
            yp_node_t *node;

            if (accept(parser, YP_TOKEN_STRING_END)) {
@ -11754,7 +11837,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
                };

                return (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
-            } else if (!lex_mode->as.string.interpolation) {
+            } else if (!lex_interpolation) {
                // If we don't accept interpolation then we expect the string to start
                // with a single string content node.
                expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter.");
@ -11858,9 +11941,12 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
                return node;
            }
        }
-        case YP_TOKEN_SYMBOL_BEGIN:
+        case YP_TOKEN_SYMBOL_BEGIN: {
+            yp_lex_mode_t lex_mode = *parser->lex_modes.current;
            parser_lex(parser);
-            return parse_symbol(parser, lex_mode, YP_LEX_STATE_END);
+
+            return parse_symbol(parser, &lex_mode, YP_LEX_STATE_END);
+        }
        default:
            if (context_recoverable(parser, &parser->current)) {
                parser->recovering = true;
@ -12482,82 +12568,8 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t

                    return path;
                }
-                case YP_TOKEN_AMPERSAND:
-                case YP_TOKEN_BACKTICK:
-                case YP_TOKEN_BANG:
-                case YP_TOKEN_BANG_EQUAL:
-                case YP_TOKEN_BANG_TILDE:
-                case YP_TOKEN_CARET:
-                case YP_TOKEN_EQUAL_EQUAL:
-                case YP_TOKEN_EQUAL_EQUAL_EQUAL:
-                case YP_TOKEN_EQUAL_TILDE:
-                case YP_TOKEN_GREATER:
-                case YP_TOKEN_GREATER_EQUAL:
-                case YP_TOKEN_GREATER_GREATER:
-                case YP_TOKEN_HEREDOC_START:
-                case YP_TOKEN_IGNORED_NEWLINE:
-                case YP_TOKEN_KEYWORD_ALIAS:
-                case YP_TOKEN_KEYWORD_AND:
-                case YP_TOKEN_KEYWORD_BEGIN:
-                case YP_TOKEN_KEYWORD_BEGIN_UPCASE:
-                case YP_TOKEN_KEYWORD_BREAK:
-                case YP_TOKEN_KEYWORD_CASE:
-                case YP_TOKEN_KEYWORD_CLASS:
-                case YP_TOKEN_KEYWORD_DEF:
-                case YP_TOKEN_KEYWORD_DEFINED:
-                case YP_TOKEN_KEYWORD_DO:
-                case YP_TOKEN_KEYWORD_ELSE:
-                case YP_TOKEN_KEYWORD_ELSIF:
-                case YP_TOKEN_KEYWORD_END:
-                case YP_TOKEN_KEYWORD_END_UPCASE:
-                case YP_TOKEN_KEYWORD_ENSURE:
-                case YP_TOKEN_KEYWORD_FALSE:
-                case YP_TOKEN_KEYWORD_FOR:
-                case YP_TOKEN_KEYWORD_IF:
-                case YP_TOKEN_KEYWORD_IN:
-                case YP_TOKEN_KEYWORD_NEXT:
-                case YP_TOKEN_KEYWORD_NIL:
-                case YP_TOKEN_KEYWORD_NOT:
-                case YP_TOKEN_KEYWORD_OR:
-                case YP_TOKEN_KEYWORD_REDO:
-                case YP_TOKEN_KEYWORD_RESCUE:
-                case YP_TOKEN_KEYWORD_RETRY:
-                case YP_TOKEN_KEYWORD_RETURN:
-                case YP_TOKEN_KEYWORD_SELF:
-                case YP_TOKEN_KEYWORD_SUPER:
-                case YP_TOKEN_KEYWORD_THEN:
-                case YP_TOKEN_KEYWORD_TRUE:
-                case YP_TOKEN_KEYWORD_UNDEF:
-                case YP_TOKEN_KEYWORD_UNLESS:
-                case YP_TOKEN_KEYWORD_UNTIL:
-                case YP_TOKEN_KEYWORD_WHEN:
-                case YP_TOKEN_KEYWORD_WHILE:
-                case YP_TOKEN_KEYWORD_YIELD:
-                case YP_TOKEN_KEYWORD___ENCODING__:
-                case YP_TOKEN_KEYWORD___FILE__:
-                case YP_TOKEN_KEYWORD___LINE__:
-                case YP_TOKEN_LESS:
-                case YP_TOKEN_LESS_EQUAL:
-                case YP_TOKEN_LESS_EQUAL_GREATER:
-                case YP_TOKEN_LESS_LESS:
-                case YP_TOKEN_MINUS:
-                case YP_TOKEN_PERCENT:
-                case YP_TOKEN_PERCENT_LOWER_I:
-                case YP_TOKEN_PERCENT_LOWER_W:
-                case YP_TOKEN_PERCENT_LOWER_X:
-                case YP_TOKEN_PERCENT_UPPER_I:
-                case YP_TOKEN_PERCENT_UPPER_W:
-                case YP_TOKEN_PIPE:
-                case YP_TOKEN_PLUS:
-                case YP_TOKEN_REGEXP_BEGIN:
-                case YP_TOKEN_SLASH:
-                case YP_TOKEN_STAR:
-                case YP_TOKEN_STAR_STAR:
-                case YP_TOKEN_TILDE:
-                case YP_TOKEN_UCOLON_COLON:
-                case YP_TOKEN_UDOT_DOT:
-                case YP_TOKEN_UDOT_DOT_DOT:
-                case YP_TOKEN___END__:
+                case YP_CASE_OPERATOR:
+                case YP_CASE_KEYWORD:
                case YP_TOKEN_IDENTIFIER: {
                    parser_lex(parser);

@ -12805,7 +12817,7 @@ yp_parser_init(yp_parser_t *parser, const char *source, size_t size, const char
    } else if (size >= 2 && source[0] == '#' && source[1] == '!') {
        // If the first two bytes of the source are a shebang, then we'll indicate
        // that the encoding comment is at the end of the shebang.
-        const char *encoding_comment_start = memchr(source, '\n', size);
+        const char *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
        if (encoding_comment_start) {
            parser->encoding_comment_start = encoding_comment_start + 1;
        }
@ -12891,6 +12903,3 @@ yp_parse_serialize(const char *source, size_t size, yp_buffer_t *buffer) {
 #undef YP_CASE_KEYWORD
 #undef YP_CASE_OPERATOR
 #undef YP_CASE_WRITABLE
-#undef YP_STRINGIZE
-#undef YP_STRINGIZE0
-#undef YP_VERSION_MACRO
--- a/yarp/yarp.h
+++ b/yarp/yarp.h
@ -2,19 +2,6 @@
 #define YARP_H

 #include "yarp/defines.h"
-
-#include <assert.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifndef _WIN32
-#include <strings.h>
-#endif
-
-#include "yarp/missing.h"
 #include "yarp/ast.h"
 #include "yarp/diagnostic.h"
 #include "yarp/node.h"
@ -24,17 +11,26 @@
 #include "yarp/unescape.h"
 #include "yarp/util/yp_buffer.h"
 #include "yarp/util/yp_char.h"
+#include "yarp/util/yp_memchr.h"
 #include "yarp/util/yp_strpbrk.h"

-#define YP_VERSION_MAJOR 0
-#define YP_VERSION_MINOR 4
-#define YP_VERSION_PATCH 0
+#include <assert.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef _WIN32
+#include <strings.h>
+#endif

 void yp_serialize_content(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);

 void yp_print_node(yp_parser_t *parser, yp_node_t *node);

-// Returns the YARP version and notably the serialization format
+// The YARP version and the serialization format.
 YP_EXPORTED_FUNCTION const char * yp_version(void);

 // Initialize a parser with the given start and end pointers.
@ -57,20 +53,6 @@ YP_EXPORTED_FUNCTION void yp_parser_free(yp_parser_t *parser);
 // Parse the Ruby source associated with the given parser and return the tree.
 YP_EXPORTED_FUNCTION yp_node_t * yp_parse(yp_parser_t *parser);

-// Deallocate a node and all of its children.
-YP_EXPORTED_FUNCTION void yp_node_destroy(yp_parser_t *parser, struct yp_node *node);
-
-// This struct stores the information gathered by the yp_node_memsize function.
-// It contains both the memory footprint and additionally metadata about the
-// shape of the tree.
-typedef struct {
-    size_t memsize;
-    size_t node_count;
-} yp_memsize_t;
-
-// Calculates the memory footprint of a given node.
-YP_EXPORTED_FUNCTION void yp_node_memsize(yp_node_t *node, yp_memsize_t *memsize);
-
 // Pretty-prints the AST represented by the given node to the given buffer.
 YP_EXPORTED_FUNCTION void yp_prettyprint(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer);