[ruby/prism] Relocation

https://github.com/ruby/prism/commit/5ea6042408
This commit is contained in:
Kevin Newton 2024-10-15 08:37:34 -04:00 committed by git
parent ebfe615a0c
commit 550ae745f6
6 changed files with 844 additions and 0 deletions

View File

@ -24,6 +24,7 @@ module Prism
autoload :Pack, "prism/pack"
autoload :Pattern, "prism/pattern"
autoload :Reflection, "prism/reflection"
autoload :Relocation, "prism/relocation"
autoload :Serialize, "prism/serialize"
autoload :StringQuery, "prism/string_query"
autoload :Translation, "prism/translation"

View File

@ -35,6 +35,7 @@ Gem::Specification.new do |spec|
"docs/parser_translation.md",
"docs/parsing_rules.md",
"docs/releasing.md",
"docs/relocation.md",
"docs/ripper_translation.md",
"docs/ruby_api.md",
"docs/ruby_parser_translation.md",
@ -88,6 +89,7 @@ Gem::Specification.new do |spec|
"lib/prism/polyfill/byteindex.rb",
"lib/prism/polyfill/unpack1.rb",
"lib/prism/reflection.rb",
"lib/prism/relocation.rb",
"lib/prism/serialize.rb",
"lib/prism/string_query.rb",
"lib/prism/translation.rb",
@ -130,6 +132,7 @@ Gem::Specification.new do |spec|
"sig/prism/parse_result.rbs",
"sig/prism/pattern.rbs",
"sig/prism/reflection.rbs",
"sig/prism/relocation.rbs",
"sig/prism/serialize.rbs",
"sig/prism/string_query.rbs",
"sig/prism/visitor.rbs",

504
lib/prism/relocation.rb Normal file
View File

@ -0,0 +1,504 @@
# frozen_string_literal: true
module Prism
# Prism parses deterministically for the same input. This provides a nice
# property that is exposed through the #node_id API on nodes. Effectively this
# means that for the same input, these values will remain consistent every
# time the source is parsed. This means we can reparse the source same with a
# #node_id value and find the exact same node again.
#
# The Relocation module provides an API around this property. It allows you to
# "save" nodes and locations using a minimal amount of memory (just the
# node_id and a field identifier) and then reify them later.
module Relocation
# An entry in a repository that will lazily reify its values when they are
# first accessed.
class Entry
# Raised if a value that could potentially be on an entry is missing
# because it was either not configured on the repository or it has not yet
# been fetched.
class MissingValueError < StandardError
end
# Initialize a new entry with the given repository.
def initialize(repository)
@repository = repository
@values = nil
end
# Fetch the filepath of the value.
def filepath
fetch_value(:filepath)
end
# Fetch the start line of the value.
def start_line
fetch_value(:start_line)
end
# Fetch the end line of the value.
def end_line
fetch_value(:end_line)
end
# Fetch the start byte offset of the value.
def start_offset
fetch_value(:start_offset)
end
# Fetch the end byte offset of the value.
def end_offset
fetch_value(:end_offset)
end
# Fetch the start character offset of the value.
def start_character_offset
fetch_value(:start_character_offset)
end
# Fetch the end character offset of the value.
def end_character_offset
fetch_value(:end_character_offset)
end
# Fetch the start code units offset of the value, for the encoding that
# was configured on the repository.
def start_code_units_offset
fetch_value(:start_code_units_offset)
end
# Fetch the end code units offset of the value, for the encoding that was
# configured on the repository.
def end_code_units_offset
fetch_value(:end_code_units_offset)
end
# Fetch the start byte column of the value.
def start_column
fetch_value(:start_column)
end
# Fetch the end byte column of the value.
def end_column
fetch_value(:end_column)
end
# Fetch the start character column of the value.
def start_character_column
fetch_value(:start_character_column)
end
# Fetch the end character column of the value.
def end_character_column
fetch_value(:end_character_column)
end
# Fetch the start code units column of the value, for the encoding that
# was configured on the repository.
def start_code_units_column
fetch_value(:start_code_units_column)
end
# Fetch the end code units column of the value, for the encoding that was
# configured on the repository.
def end_code_units_column
fetch_value(:end_code_units_column)
end
# Fetch the leading comments of the value.
def leading_comments
fetch_value(:leading_comments)
end
# Fetch the trailing comments of the value.
def trailing_comments
fetch_value(:trailing_comments)
end
# Fetch the leading and trailing comments of the value.
def comments
leading_comments.concat(trailing_comments)
end
# Reify the values on this entry with the given values. This is an
# internal-only API that is called from the repository when it is time to
# reify the values.
def reify!(values) # :nodoc:
@repository = nil
@values = values
end
private
# Fetch a value from the entry, raising an error if it is missing.
def fetch_value(name)
values.fetch(name) do
raise MissingValueError, "No value for #{name}, make sure the " \
"repository has been properly configured"
end
end
# Return the values from the repository, reifying them if necessary.
def values
@values || (@repository.reify!; @values)
end
end
# Represents the source of a repository that will be reparsed.
class Source
# The value that will need to be reparsed.
attr_reader :value
# Initialize the source with the given value.
def initialize(value)
@value = value
end
# Reparse the value and return the parse result.
def result
raise NotImplementedError, "Subclasses must implement #result"
end
# Create a code units cache for the given encoding.
def code_units_cache(encoding)
result.code_units_cache(encoding)
end
end
# A source that is represented by a file path.
class SourceFilepath < Source
# Reparse the file and return the parse result.
def result
Prism.parse_file(value)
end
end
# A source that is represented by a string.
class SourceString < Source
# Reparse the string and return the parse result.
def result
Prism.parse(value)
end
end
# A field that represents the file path.
class FilepathField
# The file path that this field represents.
attr_reader :value
# Initialize a new field with the given file path.
def initialize(value)
@value = value
end
# Fetch the file path.
def fields(_value)
{ filepath: value }
end
end
# A field representing the start and end lines.
class LinesField
# Fetches the start and end line of a value.
def fields(value)
{ start_line: value.start_line, end_line: value.end_line }
end
end
# A field representing the start and end byte offsets.
class OffsetsField
# Fetches the start and end byte offset of a value.
def fields(value)
{ start_offset: value.start_offset, end_offset: value.end_offset }
end
end
# A field representing the start and end character offsets.
class CharacterOffsetsField
# Fetches the start and end character offset of a value.
def fields(value)
{
start_character_offset: value.start_character_offset,
end_character_offset: value.end_character_offset
}
end
end
# A field representing the start and end code unit offsets.
class CodeUnitOffsetsField
# A pointer to the repository object that is used for lazily creating a
# code units cache.
attr_reader :repository
# The associated encoding for the code units.
attr_reader :encoding
# Initialize a new field with the associated repository and encoding.
def initialize(repository, encoding)
@repository = repository
@encoding = encoding
@cache = nil
end
# Fetches the start and end code units offset of a value for a particular
# encoding.
def fields(value)
{
start_code_units_offset: value.cached_start_code_units_offset(cache),
end_code_units_offset: value.cached_end_code_units_offset(cache)
}
end
private
# Lazily create a code units cache for the associated encoding.
def cache
@cache ||= repository.code_units_cache(encoding)
end
end
# A field representing the start and end byte columns.
class ColumnsField
# Fetches the start and end byte column of a value.
def fields(value)
{ start_column: value.start_column, end_column: value.end_column }
end
end
# A field representing the start and end character columns.
class CharacterColumnsField
# Fetches the start and end character column of a value.
def fields(value)
{
start_character_column: value.start_character_column,
end_character_column: value.end_character_column
}
end
end
# A field representing the start and end code unit columns for a specific
# encoding.
class CodeUnitColumnsField
# The repository object that is used for lazily creating a code units
# cache.
attr_reader :repository
# The associated encoding for the code units.
attr_reader :encoding
# Initialize a new field with the associated repository and encoding.
def initialize(repository, encoding)
@repository = repository
@encoding = encoding
@cache = nil
end
# Fetches the start and end code units column of a value for a particular
# encoding.
def fields(value)
{
start_code_units_column: value.cached_start_code_units_column(cache),
end_code_units_column: value.cached_end_code_units_column(cache)
}
end
private
# Lazily create a code units cache for the associated encoding.
def cache
@cache ||= repository.code_units_cache(encoding)
end
end
# An abstract field used as the parent class of the two comments fields.
class CommentsField
# An object that represents a slice of a comment.
class Comment
# The slice of the comment.
attr_reader :slice
# Initialize a new comment with the given slice.
def initialize(slice)
@slice = slice
end
end
private
# Create comment objects from the given values.
def comments(values)
values.map { |value| Comment.new(value.slice) }
end
end
# A field representing the leading comments.
class LeadingCommentsField < CommentsField
# Fetches the leading comments of a value.
def fields(value)
{ leading_comments: comments(value.leading_comments) }
end
end
# A field representing the trailing comments.
class TrailingCommentsField < CommentsField
# Fetches the trailing comments of a value.
def fields(value)
{ trailing_comments: comments(value.trailing_comments) }
end
end
# A repository is a configured collection of fields and a set of entries
# that knows how to reparse a source and reify the values.
class Repository
# Raised when multiple fields of the same type are configured on the same
# repository.
class ConfigurationError < StandardError
end
# The source associated with this repository. This will be either a
# SourceFilepath (the most common use case) or a SourceString.
attr_reader :source
# The fields that have been configured on this repository.
attr_reader :fields
# The entries that have been saved on this repository.
attr_reader :entries
# Initialize a new repository with the given source.
def initialize(source)
@source = source
@fields = {}
@entries = Hash.new { |hash, node_id| hash[node_id] = {} }
end
# Create a code units cache for the given encoding from the source.
def code_units_cache(encoding)
source.code_units_cache(encoding)
end
# Configure the filepath field for this repository and return self.
def filepath
raise ConfigurationError, "Can only specify filepath for a filepath source" unless source.is_a?(SourceFilepath)
field(:filepath, FilepathField.new(source.value))
end
# Configure the lines field for this repository and return self.
def lines
field(:lines, LinesField.new)
end
# Configure the offsets field for this repository and return self.
def offsets
field(:offsets, OffsetsField.new)
end
# Configure the character offsets field for this repository and return
# self.
def character_offsets
field(:character_offsets, CharacterOffsetsField.new)
end
# Configure the code unit offsets field for this repository for a specific
# encoding and return self.
def code_unit_offsets(encoding)
field(:code_unit_offsets, CodeUnitOffsetsField.new(self, encoding))
end
# Configure the columns field for this repository and return self.
def columns
field(:columns, ColumnsField.new)
end
# Configure the character columns field for this repository and return
# self.
def character_columns
field(:character_columns, CharacterColumnsField.new)
end
# Configure the code unit columns field for this repository for a specific
# encoding and return self.
def code_unit_columns(encoding)
field(:code_unit_columns, CodeUnitColumnsField.new(self, encoding))
end
# Configure the leading comments field for this repository and return
# self.
def leading_comments
field(:leading_comments, LeadingCommentsField.new)
end
# Configure the trailing comments field for this repository and return
# self.
def trailing_comments
field(:trailing_comments, TrailingCommentsField.new)
end
# Configure both the leading and trailing comment fields for this
# repository and return self.
def comments
leading_comments.trailing_comments
end
# This method is called from nodes and locations when they want to enter
# themselves into the repository. It it internal-only and meant to be
# called from the #save* APIs.
def enter(node_id, field_name) # :nodoc:
entry = Entry.new(self)
@entries[node_id][field_name] = entry
entry
end
# This method is called from the entries in the repository when they need
# to reify their values. It is internal-only and meant to be called from
# the various value APIs.
def reify! # :nodoc:
result = source.result
# Attach the comments if they have been requested as part of the
# configuration of this repository.
if fields.key?(:leading_comments) || fields.key?(:trailing_comments)
result.attach_comments!
end
queue = [result.value] #: Array[Prism::node]
while (node = queue.shift)
@entries[node.node_id].each do |field_name, entry|
value = node.public_send(field_name)
values = {}
fields.each_value do |field|
values.merge!(field.fields(value))
end
entry.reify!(values)
end
queue.concat(node.compact_child_nodes)
end
@entries.clear
end
private
# Append the given field to the repository and return the repository so
# that these calls can be chained.
def field(name, value)
raise ConfigurationError, "Cannot specify multiple #{name} fields" if @fields.key?(name)
@fields[name] = value
self
end
end
# Create a new repository for the given filepath.
def self.filepath(value)
Repository.new(SourceFilepath.new(value))
end
# Create a new repository for the given string.
def self.string(value)
Repository.new(SourceString.new(value))
end
end
end

View File

@ -12,6 +12,11 @@ module Prism
# will be consistent across multiple parses of the same source code.
attr_reader :node_id
# Save this node using a saved source so that it can be retrieved later.
def save(repository)
repository.enter(node_id, :itself)
end
# A Location instance that represents the location of this node in the
# source.
def location
@ -20,6 +25,21 @@ module Prism
@location = Location.new(source, location >> 32, location & 0xFFFFFFFF)
end
# Save the location using a saved source so that it can be retrieved later.
def save_location(repository)
repository.enter(node_id, :location)
end
# Delegates to the start_line of the associated location object.
def start_line
location.start_line
end
# Delegates to the end_line of the associated location object.
def end_line
location.end_line
end
# The start offset of the node in the source. This method is effectively a
# delegate method to the location object.
def start_offset
@ -34,6 +54,75 @@ module Prism
location.is_a?(Location) ? location.end_offset : ((location >> 32) + (location & 0xFFFFFFFF))
end
# Delegates to the start_character_offset of the associated location object.
def start_character_offset
location.start_character_offset
end
# Delegates to the end_character_offset of the associated location object.
def end_character_offset
location.end_character_offset
end
# Delegates to the cached_start_code_units_offset of the associated location
# object.
def cached_start_code_units_offset(cache)
location.cached_start_code_units_offset(cache)
end
# Delegates to the cached_end_code_units_offset of the associated location
# object.
def cached_end_code_units_offset(cache)
location.cached_end_code_units_offset(cache)
end
# Delegates to the start_column of the associated location object.
def start_column
location.start_column
end
# Delegates to the end_column of the associated location object.
def end_column
location.end_column
end
# Delegates to the start_character_column of the associated location object.
def start_character_column
location.start_character_column
end
# Delegates to the end_character_column of the associated location object.
def end_character_column
location.end_character_column
end
# Delegates to the cached_start_code_units_column of the associated location
# object.
def cached_start_code_units_column(cache)
location.cached_start_code_units_column(cache)
end
# Delegates to the cached_end_code_units_column of the associated location
# object.
def cached_end_code_units_column(cache)
location.cached_end_code_units_column(cache)
end
# Delegates to the leading_comments of the associated location object.
def leading_comments
location.leading_comments
end
# Delegates to the trailing_comments of the associated location object.
def trailing_comments
location.trailing_comments
end
# Delegates to the comments of the associated location object.
def comments
location.comments
end
# Returns all of the lines of the source code associated with this node.
def source_lines
location.source_lines
@ -318,6 +407,12 @@ module Prism
return location if location.is_a?(Location)
@<%= field.name %> = Location.new(source, location >> 32, location & 0xFFFFFFFF)
end
# Save the <%= field.name %> location using the given saved source so that
# it can be retrieved later.
def save_<%= field.name %>(repository)
repository.enter(node_id, :<%= field.name %>)
end
<%- when Prism::Template::OptionalLocationField -%>
def <%= field.name %>
location = @<%= field.name %>
@ -330,6 +425,12 @@ module Prism
@<%= field.name %> = Location.new(source, location >> 32, location & 0xFFFFFFFF)
end
end
# Save the <%= field.name %> location using the given saved source so that
# it can be retrieved later.
def save_<%= field.name %>(repository)
repository.enter(node_id, :<%= field.name %>) unless @<%= field.name %>.nil?
end
<%- else -%>
attr_reader :<%= field.name %>
<%- end -%>

View File

@ -0,0 +1,43 @@
# This script finds the declaration of all classes and modules and stores them
# in a hash for an in-memory database of constants.
require "prism"
class RelocationVisitor < Prism::Visitor
attr_reader :index, :repository, :scope
def initialize(index, repository, scope = [])
@index = index
@repository = repository
@scope = scope
end
def visit_class_node(node)
next_scope = scope + node.constant_path.full_name_parts
index[next_scope.join("::")] << node.constant_path.save(repository)
node.body&.accept(RelocationVisitor.new(index, repository, next_scope))
end
def visit_module_node(node)
next_scope = scope + node.constant_path.full_name_parts
index[next_scope.join("::")] << node.constant_path.save(repository)
node.body&.accept(RelocationVisitor.new(index, repository, next_scope))
end
end
# Create an index that will store a mapping between the names of constants to a
# list of the locations where they are declared or re-opened.
index = Hash.new { |hash, key| hash[key] = [] }
# Loop through every file in the lib directory of this repository and parse them
# with Prism. Then visit them using the RelocateVisitor to store their
# repository entries in the index.
Dir[File.expand_path("../../lib/**/*.rb", __dir__)].each do |filepath|
repository = Prism::Relocation.filepath(filepath).filepath.lines.code_unit_columns(Encoding::UTF_16LE)
Prism.parse_file(filepath).value.accept(RelocationVisitor.new(index, repository))
end
puts index["Prism::ParametersNode"].map { |entry| "#{entry.filepath}:#{entry.start_line}:#{entry.start_code_units_column}" }
# =>
# prism/lib/prism/node.rb:13889:8
# prism/lib/prism/node_ext.rb:267:8

View File

@ -0,0 +1,192 @@
# frozen_string_literal: true
require_relative "../test_helper"
module Prism
class RelocationTest < TestCase
def test_repository_filepath
repository = Relocation.filepath(__FILE__).lines
declaration = Prism.parse_file(__FILE__).value.statements.body[1]
assert_equal 5, declaration.save(repository).start_line
end
def test_filepath
repository = Relocation.filepath(__FILE__).filepath
declaration = Prism.parse_file(__FILE__).value.statements.body[1]
assert_equal __FILE__, declaration.save(repository).filepath
end
def test_lines
source = "class Foo😀\nend"
repository = Relocation.string(source).lines
declaration = Prism.parse(source).value.statements.body.first
node_entry = declaration.save(repository)
location_entry = declaration.save_location(repository)
assert_equal 1, node_entry.start_line
assert_equal 2, node_entry.end_line
assert_equal 1, location_entry.start_line
assert_equal 2, location_entry.end_line
end
def test_offsets
source = "class Foo😀\nend"
repository = Relocation.string(source).offsets
declaration = Prism.parse(source).value.statements.body.first
node_entry = declaration.constant_path.save(repository)
location_entry = declaration.constant_path.save_location(repository)
assert_equal 6, node_entry.start_offset
assert_equal 13, node_entry.end_offset
assert_equal 6, location_entry.start_offset
assert_equal 13, location_entry.end_offset
end
def test_character_offsets
source = "class Foo😀\nend"
repository = Relocation.string(source).character_offsets
declaration = Prism.parse(source).value.statements.body.first
node_entry = declaration.constant_path.save(repository)
location_entry = declaration.constant_path.save_location(repository)
assert_equal 6, node_entry.start_character_offset
assert_equal 10, node_entry.end_character_offset
assert_equal 6, location_entry.start_character_offset
assert_equal 10, location_entry.end_character_offset
end
def test_code_unit_offsets
source = "class Foo😀\nend"
repository = Relocation.string(source).code_unit_offsets(Encoding::UTF_16LE)
declaration = Prism.parse(source).value.statements.body.first
node_entry = declaration.constant_path.save(repository)
location_entry = declaration.constant_path.save_location(repository)
assert_equal 6, node_entry.start_code_units_offset
assert_equal 11, node_entry.end_code_units_offset
assert_equal 6, location_entry.start_code_units_offset
assert_equal 11, location_entry.end_code_units_offset
end
def test_columns
source = "class Foo😀\nend"
repository = Relocation.string(source).columns
declaration = Prism.parse(source).value.statements.body.first
node_entry = declaration.constant_path.save(repository)
location_entry = declaration.constant_path.save_location(repository)
assert_equal 6, node_entry.start_column
assert_equal 13, node_entry.end_column
assert_equal 6, location_entry.start_column
assert_equal 13, location_entry.end_column
end
def test_character_columns
source = "class Foo😀\nend"
repository = Relocation.string(source).character_columns
declaration = Prism.parse(source).value.statements.body.first
node_entry = declaration.constant_path.save(repository)
location_entry = declaration.constant_path.save_location(repository)
assert_equal 6, node_entry.start_character_column
assert_equal 10, node_entry.end_character_column
assert_equal 6, location_entry.start_character_column
assert_equal 10, location_entry.end_character_column
end
def test_code_unit_columns
source = "class Foo😀\nend"
repository = Relocation.string(source).code_unit_columns(Encoding::UTF_16LE)
declaration = Prism.parse(source).value.statements.body.first
node_entry = declaration.constant_path.save(repository)
location_entry = declaration.constant_path.save_location(repository)
assert_equal 6, node_entry.start_code_units_column
assert_equal 11, node_entry.end_code_units_column
assert_equal 6, location_entry.start_code_units_column
assert_equal 11, location_entry.end_code_units_column
end
def test_leading_comments
source = "# leading\nclass Foo\nend"
repository = Relocation.string(source).leading_comments
declaration = Prism.parse(source).value.statements.body.first
node_entry = declaration.save(repository)
location_entry = declaration.save_location(repository)
assert_equal ["# leading"], node_entry.leading_comments.map(&:slice)
assert_equal ["# leading"], location_entry.leading_comments.map(&:slice)
end
def test_trailing_comments
source = "class Foo\nend\n# trailing"
repository = Relocation.string(source).trailing_comments
declaration = Prism.parse(source).value.statements.body.first
node_entry = declaration.save(repository)
location_entry = declaration.save_location(repository)
assert_equal ["# trailing"], node_entry.trailing_comments.map(&:slice)
assert_equal ["# trailing"], location_entry.trailing_comments.map(&:slice)
end
def test_comments
source = "# leading\nclass Foo\nend\n# trailing"
repository = Relocation.string(source).comments
declaration = Prism.parse(source).value.statements.body.first
node_entry = declaration.save(repository)
location_entry = declaration.save_location(repository)
assert_equal ["# leading", "# trailing"], node_entry.comments.map(&:slice)
assert_equal ["# leading", "# trailing"], location_entry.comments.map(&:slice)
end
def test_misconfiguration
assert_raise Relocation::Repository::ConfigurationError do
Relocation.string("").comments.leading_comments
end
assert_raise Relocation::Repository::ConfigurationError do
Relocation.string("").comments.trailing_comments
end
assert_raise Relocation::Repository::ConfigurationError do
Relocation.string("").code_unit_offsets(Encoding::UTF_8).code_unit_offsets(Encoding::UTF_16LE)
end
assert_raise Relocation::Repository::ConfigurationError do
Relocation.string("").lines.lines
end
end
def test_missing_values
source = "class Foo; end"
repository = Relocation.string(source).lines
declaration = Prism.parse(source).value.statements.body.first
entry = declaration.constant_path.save(repository)
assert_raise Relocation::Entry::MissingValueError do
entry.start_offset
end
end
end
end