[ruby/prism] Add character APIs for locations

(https://github.com/ruby/prism/pull/1809)

https://github.com/ruby/prism/commit/d493ccd093
This commit is contained in:
Kevin Newton 2023-11-20 11:07:02 -05:00 committed by git
parent adee7dab3e
commit f2ed7eaba0
4 changed files with 83 additions and 23 deletions

View File

@ -230,7 +230,7 @@ module Prism
loader = Serialize::Loader.new(source, buffer.read)
loader.load_header
loader.load_force_encoding
loader.load_encoding
loader.load_start_line
loader.load_comments
end

View File

@ -25,40 +25,50 @@ module Prism
# Perform a byteslice on the source code using the given byte offset and
# byte length.
def slice(offset, length)
source.byteslice(offset, length)
def slice(byte_offset, length)
source.byteslice(byte_offset, length)
end
# Binary search through the offsets to find the line number for the given
# byte offset.
def line(value)
start_line + find_line(value)
def line(byte_offset)
start_line + find_line(byte_offset)
end
# Return the byte offset of the start of the line corresponding to the given
# byte offset.
def line_offset(value)
offsets[find_line(value)]
def line_start(byte_offset)
offsets[find_line(byte_offset)]
end
# Return the column number for the given byte offset.
def column(value)
value - offsets[find_line(value)]
def column(byte_offset)
byte_offset - line_start(byte_offset)
end
# Return the character offset for the given byte offset.
def character_offset(byte_offset)
source.byteslice(0, byte_offset).length
end
# Return the column number in characters for the given byte offset.
def character_column(byte_offset)
character_offset(byte_offset) - character_offset(line_start(byte_offset))
end
private
# Binary search through the offsets to find the line number for the given
# byte offset.
def find_line(value)
def find_line(byte_offset)
left = 0
right = offsets.length - 1
while left <= right
mid = left + (right - left) / 2
return mid if offsets[mid] == value
return mid if offsets[mid] == byte_offset
if offsets[mid] < value
if offsets[mid] < byte_offset
left = mid + 1
else
right = mid - 1
@ -121,11 +131,23 @@ module Prism
source.slice(start_offset, length)
end
# The character offset from the beginning of the source where this location
# starts.
def start_character_offset
source.character_offset(start_offset)
end
# The byte offset from the beginning of the source where this location ends.
def end_offset
start_offset + length
end
# The character offset from the beginning of the source where this location
# ends.
def end_character_offset
source.character_offset(end_offset)
end
# The line number where this location starts.
def start_line
source.line(start_offset)
@ -133,7 +155,7 @@ module Prism
# The content of the line where this location starts before this location.
def start_line_slice
offset = source.line_offset(start_offset)
offset = source.line_start(start_offset)
source.slice(offset, start_offset - offset)
end
@ -148,12 +170,24 @@ module Prism
source.column(start_offset)
end
# The column number in characters where this location ends from the start of
# the line.
def start_character_column
source.character_column(start_offset)
end
# The column number in bytes where this location ends from the start of the
# line.
def end_column
source.column(end_offset)
end
# The column number in characters where this location ends from the start of
# the line.
def end_character_column
source.character_column(end_offset)
end
# Implement the hash pattern matching interface for Location.
def deconstruct_keys(keys)
{ start_offset: start_offset, end_offset: end_offset }

View File

@ -73,12 +73,9 @@ module Prism
end
def load_encoding
Encoding.find(io.read(load_varint))
end
def load_force_encoding
@encoding = load_encoding
@encoding = Encoding.find(io.read(load_varint))
@input = input.force_encoding(@encoding).freeze
@encoding
end
def load_start_line
@ -121,10 +118,7 @@ module Prism
encoding = load_encoding
load_start_line
comments, magic_comments, errors, warnings = load_metadata
if encoding != @encoding
tokens.each { |token,| token.value.force_encoding(encoding) }
end
tokens.each { |token,| token.value.force_encoding(encoding) }
raise "Expected to consume all bytes while deserializing" unless @io.eof?
Prism::ParseResult.new(tokens, comments, magic_comments, errors, warnings, @source)
@ -132,7 +126,7 @@ module Prism
def load_nodes
load_header
load_force_encoding
load_encoding
load_start_line
comments, magic_comments, errors, warnings = load_metadata

View File

@ -71,6 +71,38 @@ module Prism
end
end
def test_location_character_offsets
program = Prism.parse("😀 + 😀\n😍 ||= 😍").value
# first 😀
location = program.statements.body.first.receiver.location
assert_equal 0, location.start_character_offset
assert_equal 1, location.end_character_offset
assert_equal 0, location.start_character_column
assert_equal 1, location.end_character_column
# second 😀
location = program.statements.body.first.arguments.arguments.first.location
assert_equal 4, location.start_character_offset
assert_equal 5, location.end_character_offset
assert_equal 4, location.start_character_column
assert_equal 5, location.end_character_column
# first 😍
location = program.statements.body.last.name_loc
assert_equal 6, location.start_character_offset
assert_equal 7, location.end_character_offset
assert_equal 0, location.start_character_column
assert_equal 1, location.end_character_column
# second 😍
location = program.statements.body.last.value.location
assert_equal 12, location.start_character_offset
assert_equal 13, location.end_character_offset
assert_equal 6, location.start_character_column
assert_equal 7, location.end_character_column
end
private
def parse_expression(source)