[ruby/prism] Add character APIs for locations
(https://github.com/ruby/prism/pull/1809) https://github.com/ruby/prism/commit/d493ccd093
This commit is contained in:
parent
adee7dab3e
commit
f2ed7eaba0
@ -230,7 +230,7 @@ module Prism
|
|||||||
loader = Serialize::Loader.new(source, buffer.read)
|
loader = Serialize::Loader.new(source, buffer.read)
|
||||||
|
|
||||||
loader.load_header
|
loader.load_header
|
||||||
loader.load_force_encoding
|
loader.load_encoding
|
||||||
loader.load_start_line
|
loader.load_start_line
|
||||||
loader.load_comments
|
loader.load_comments
|
||||||
end
|
end
|
||||||
|
@ -25,40 +25,50 @@ module Prism
|
|||||||
|
|
||||||
# Perform a byteslice on the source code using the given byte offset and
|
# Perform a byteslice on the source code using the given byte offset and
|
||||||
# byte length.
|
# byte length.
|
||||||
def slice(offset, length)
|
def slice(byte_offset, length)
|
||||||
source.byteslice(offset, length)
|
source.byteslice(byte_offset, length)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Binary search through the offsets to find the line number for the given
|
# Binary search through the offsets to find the line number for the given
|
||||||
# byte offset.
|
# byte offset.
|
||||||
def line(value)
|
def line(byte_offset)
|
||||||
start_line + find_line(value)
|
start_line + find_line(byte_offset)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Return the byte offset of the start of the line corresponding to the given
|
# Return the byte offset of the start of the line corresponding to the given
|
||||||
# byte offset.
|
# byte offset.
|
||||||
def line_offset(value)
|
def line_start(byte_offset)
|
||||||
offsets[find_line(value)]
|
offsets[find_line(byte_offset)]
|
||||||
end
|
end
|
||||||
|
|
||||||
# Return the column number for the given byte offset.
|
# Return the column number for the given byte offset.
|
||||||
def column(value)
|
def column(byte_offset)
|
||||||
value - offsets[find_line(value)]
|
byte_offset - line_start(byte_offset)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Return the character offset for the given byte offset.
|
||||||
|
def character_offset(byte_offset)
|
||||||
|
source.byteslice(0, byte_offset).length
|
||||||
|
end
|
||||||
|
|
||||||
|
# Return the column number in characters for the given byte offset.
|
||||||
|
def character_column(byte_offset)
|
||||||
|
character_offset(byte_offset) - character_offset(line_start(byte_offset))
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
# Binary search through the offsets to find the line number for the given
|
# Binary search through the offsets to find the line number for the given
|
||||||
# byte offset.
|
# byte offset.
|
||||||
def find_line(value)
|
def find_line(byte_offset)
|
||||||
left = 0
|
left = 0
|
||||||
right = offsets.length - 1
|
right = offsets.length - 1
|
||||||
|
|
||||||
while left <= right
|
while left <= right
|
||||||
mid = left + (right - left) / 2
|
mid = left + (right - left) / 2
|
||||||
return mid if offsets[mid] == value
|
return mid if offsets[mid] == byte_offset
|
||||||
|
|
||||||
if offsets[mid] < value
|
if offsets[mid] < byte_offset
|
||||||
left = mid + 1
|
left = mid + 1
|
||||||
else
|
else
|
||||||
right = mid - 1
|
right = mid - 1
|
||||||
@ -121,11 +131,23 @@ module Prism
|
|||||||
source.slice(start_offset, length)
|
source.slice(start_offset, length)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# The character offset from the beginning of the source where this location
|
||||||
|
# starts.
|
||||||
|
def start_character_offset
|
||||||
|
source.character_offset(start_offset)
|
||||||
|
end
|
||||||
|
|
||||||
# The byte offset from the beginning of the source where this location ends.
|
# The byte offset from the beginning of the source where this location ends.
|
||||||
def end_offset
|
def end_offset
|
||||||
start_offset + length
|
start_offset + length
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# The character offset from the beginning of the source where this location
|
||||||
|
# ends.
|
||||||
|
def end_character_offset
|
||||||
|
source.character_offset(end_offset)
|
||||||
|
end
|
||||||
|
|
||||||
# The line number where this location starts.
|
# The line number where this location starts.
|
||||||
def start_line
|
def start_line
|
||||||
source.line(start_offset)
|
source.line(start_offset)
|
||||||
@ -133,7 +155,7 @@ module Prism
|
|||||||
|
|
||||||
# The content of the line where this location starts before this location.
|
# The content of the line where this location starts before this location.
|
||||||
def start_line_slice
|
def start_line_slice
|
||||||
offset = source.line_offset(start_offset)
|
offset = source.line_start(start_offset)
|
||||||
source.slice(offset, start_offset - offset)
|
source.slice(offset, start_offset - offset)
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -148,12 +170,24 @@ module Prism
|
|||||||
source.column(start_offset)
|
source.column(start_offset)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# The column number in characters where this location ends from the start of
|
||||||
|
# the line.
|
||||||
|
def start_character_column
|
||||||
|
source.character_column(start_offset)
|
||||||
|
end
|
||||||
|
|
||||||
# The column number in bytes where this location ends from the start of the
|
# The column number in bytes where this location ends from the start of the
|
||||||
# line.
|
# line.
|
||||||
def end_column
|
def end_column
|
||||||
source.column(end_offset)
|
source.column(end_offset)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# The column number in characters where this location ends from the start of
|
||||||
|
# the line.
|
||||||
|
def end_character_column
|
||||||
|
source.character_column(end_offset)
|
||||||
|
end
|
||||||
|
|
||||||
# Implement the hash pattern matching interface for Location.
|
# Implement the hash pattern matching interface for Location.
|
||||||
def deconstruct_keys(keys)
|
def deconstruct_keys(keys)
|
||||||
{ start_offset: start_offset, end_offset: end_offset }
|
{ start_offset: start_offset, end_offset: end_offset }
|
||||||
|
@ -73,12 +73,9 @@ module Prism
|
|||||||
end
|
end
|
||||||
|
|
||||||
def load_encoding
|
def load_encoding
|
||||||
Encoding.find(io.read(load_varint))
|
@encoding = Encoding.find(io.read(load_varint))
|
||||||
end
|
|
||||||
|
|
||||||
def load_force_encoding
|
|
||||||
@encoding = load_encoding
|
|
||||||
@input = input.force_encoding(@encoding).freeze
|
@input = input.force_encoding(@encoding).freeze
|
||||||
|
@encoding
|
||||||
end
|
end
|
||||||
|
|
||||||
def load_start_line
|
def load_start_line
|
||||||
@ -121,10 +118,7 @@ module Prism
|
|||||||
encoding = load_encoding
|
encoding = load_encoding
|
||||||
load_start_line
|
load_start_line
|
||||||
comments, magic_comments, errors, warnings = load_metadata
|
comments, magic_comments, errors, warnings = load_metadata
|
||||||
|
tokens.each { |token,| token.value.force_encoding(encoding) }
|
||||||
if encoding != @encoding
|
|
||||||
tokens.each { |token,| token.value.force_encoding(encoding) }
|
|
||||||
end
|
|
||||||
|
|
||||||
raise "Expected to consume all bytes while deserializing" unless @io.eof?
|
raise "Expected to consume all bytes while deserializing" unless @io.eof?
|
||||||
Prism::ParseResult.new(tokens, comments, magic_comments, errors, warnings, @source)
|
Prism::ParseResult.new(tokens, comments, magic_comments, errors, warnings, @source)
|
||||||
@ -132,7 +126,7 @@ module Prism
|
|||||||
|
|
||||||
def load_nodes
|
def load_nodes
|
||||||
load_header
|
load_header
|
||||||
load_force_encoding
|
load_encoding
|
||||||
load_start_line
|
load_start_line
|
||||||
|
|
||||||
comments, magic_comments, errors, warnings = load_metadata
|
comments, magic_comments, errors, warnings = load_metadata
|
||||||
|
@ -71,6 +71,38 @@ module Prism
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_location_character_offsets
|
||||||
|
program = Prism.parse("😀 + 😀\n😍 ||= 😍").value
|
||||||
|
|
||||||
|
# first 😀
|
||||||
|
location = program.statements.body.first.receiver.location
|
||||||
|
assert_equal 0, location.start_character_offset
|
||||||
|
assert_equal 1, location.end_character_offset
|
||||||
|
assert_equal 0, location.start_character_column
|
||||||
|
assert_equal 1, location.end_character_column
|
||||||
|
|
||||||
|
# second 😀
|
||||||
|
location = program.statements.body.first.arguments.arguments.first.location
|
||||||
|
assert_equal 4, location.start_character_offset
|
||||||
|
assert_equal 5, location.end_character_offset
|
||||||
|
assert_equal 4, location.start_character_column
|
||||||
|
assert_equal 5, location.end_character_column
|
||||||
|
|
||||||
|
# first 😍
|
||||||
|
location = program.statements.body.last.name_loc
|
||||||
|
assert_equal 6, location.start_character_offset
|
||||||
|
assert_equal 7, location.end_character_offset
|
||||||
|
assert_equal 0, location.start_character_column
|
||||||
|
assert_equal 1, location.end_character_column
|
||||||
|
|
||||||
|
# second 😍
|
||||||
|
location = program.statements.body.last.value.location
|
||||||
|
assert_equal 12, location.start_character_offset
|
||||||
|
assert_equal 13, location.end_character_offset
|
||||||
|
assert_equal 6, location.start_character_column
|
||||||
|
assert_equal 7, location.end_character_column
|
||||||
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def parse_expression(source)
|
def parse_expression(source)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user