diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index e1d3e0dca7..c910fd3aae 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -230,7 +230,7 @@ module Prism loader = Serialize::Loader.new(source, buffer.read) loader.load_header - loader.load_force_encoding + loader.load_encoding loader.load_start_line loader.load_comments end diff --git a/lib/prism/parse_result.rb b/lib/prism/parse_result.rb index 170a529bea..50c23bce65 100644 --- a/lib/prism/parse_result.rb +++ b/lib/prism/parse_result.rb @@ -25,40 +25,50 @@ module Prism # Perform a byteslice on the source code using the given byte offset and # byte length. - def slice(offset, length) - source.byteslice(offset, length) + def slice(byte_offset, length) + source.byteslice(byte_offset, length) end # Binary search through the offsets to find the line number for the given # byte offset. - def line(value) - start_line + find_line(value) + def line(byte_offset) + start_line + find_line(byte_offset) end # Return the byte offset of the start of the line corresponding to the given # byte offset. - def line_offset(value) - offsets[find_line(value)] + def line_start(byte_offset) + offsets[find_line(byte_offset)] end # Return the column number for the given byte offset. - def column(value) - value - offsets[find_line(value)] + def column(byte_offset) + byte_offset - line_start(byte_offset) + end + + # Return the character offset for the given byte offset. + def character_offset(byte_offset) + source.byteslice(0, byte_offset).length + end + + # Return the column number in characters for the given byte offset. + def character_column(byte_offset) + character_offset(byte_offset) - character_offset(line_start(byte_offset)) end private # Binary search through the offsets to find the line number for the given # byte offset. - def find_line(value) + def find_line(byte_offset) left = 0 right = offsets.length - 1 while left <= right mid = left + (right - left) / 2 - return mid if offsets[mid] == value + return mid if offsets[mid] == byte_offset - if offsets[mid] < value + if offsets[mid] < byte_offset left = mid + 1 else right = mid - 1 @@ -121,11 +131,23 @@ module Prism source.slice(start_offset, length) end + # The character offset from the beginning of the source where this location + # starts. + def start_character_offset + source.character_offset(start_offset) + end + # The byte offset from the beginning of the source where this location ends. def end_offset start_offset + length end + # The character offset from the beginning of the source where this location + # ends. + def end_character_offset + source.character_offset(end_offset) + end + # The line number where this location starts. def start_line source.line(start_offset) @@ -133,7 +155,7 @@ module Prism # The content of the line where this location starts before this location. def start_line_slice - offset = source.line_offset(start_offset) + offset = source.line_start(start_offset) source.slice(offset, start_offset - offset) end @@ -148,12 +170,24 @@ module Prism source.column(start_offset) end + # The column number in characters where this location ends from the start of + # the line. + def start_character_column + source.character_column(start_offset) + end + # The column number in bytes where this location ends from the start of the # line. def end_column source.column(end_offset) end + # The column number in characters where this location ends from the start of + # the line. + def end_character_column + source.character_column(end_offset) + end + # Implement the hash pattern matching interface for Location. def deconstruct_keys(keys) { start_offset: start_offset, end_offset: end_offset } diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb index 2837504543..e5a88ae99a 100644 --- a/prism/templates/lib/prism/serialize.rb.erb +++ b/prism/templates/lib/prism/serialize.rb.erb @@ -73,12 +73,9 @@ module Prism end def load_encoding - Encoding.find(io.read(load_varint)) - end - - def load_force_encoding - @encoding = load_encoding + @encoding = Encoding.find(io.read(load_varint)) @input = input.force_encoding(@encoding).freeze + @encoding end def load_start_line @@ -121,10 +118,7 @@ module Prism encoding = load_encoding load_start_line comments, magic_comments, errors, warnings = load_metadata - - if encoding != @encoding - tokens.each { |token,| token.value.force_encoding(encoding) } - end + tokens.each { |token,| token.value.force_encoding(encoding) } raise "Expected to consume all bytes while deserializing" unless @io.eof? Prism::ParseResult.new(tokens, comments, magic_comments, errors, warnings, @source) @@ -132,7 +126,7 @@ module Prism def load_nodes load_header - load_force_encoding + load_encoding load_start_line comments, magic_comments, errors, warnings = load_metadata diff --git a/test/prism/ruby_api_test.rb b/test/prism/ruby_api_test.rb index a61282cca1..cd87a81395 100644 --- a/test/prism/ruby_api_test.rb +++ b/test/prism/ruby_api_test.rb @@ -71,6 +71,38 @@ module Prism end end + def test_location_character_offsets + program = Prism.parse("šŸ˜€ + šŸ˜€\nšŸ˜ ||= šŸ˜").value + + # first šŸ˜€ + location = program.statements.body.first.receiver.location + assert_equal 0, location.start_character_offset + assert_equal 1, location.end_character_offset + assert_equal 0, location.start_character_column + assert_equal 1, location.end_character_column + + # second šŸ˜€ + location = program.statements.body.first.arguments.arguments.first.location + assert_equal 4, location.start_character_offset + assert_equal 5, location.end_character_offset + assert_equal 4, location.start_character_column + assert_equal 5, location.end_character_column + + # first šŸ˜ + location = program.statements.body.last.name_loc + assert_equal 6, location.start_character_offset + assert_equal 7, location.end_character_offset + assert_equal 0, location.start_character_column + assert_equal 1, location.end_character_column + + # second šŸ˜ + location = program.statements.body.last.value.location + assert_equal 12, location.start_character_offset + assert_equal 13, location.end_character_offset + assert_equal 6, location.start_character_column + assert_equal 7, location.end_character_column + end + private def parse_expression(source)