[ruby/prism] Avoid breaking code units offset on binary encoding
https://github.com/ruby/prism/commit/25a4cf6794 Co-authored-by: Kevin Newton <kddnewton@users.noreply.github.com>
This commit is contained in:
parent
615a087216
commit
e50754fcfa
@ -90,7 +90,7 @@ module Prism
|
||||
# concept of code units that differs from the number of characters in other
|
||||
# encodings, it is not captured here.
|
||||
def code_units_offset(byte_offset, encoding)
|
||||
byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding)
|
||||
byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
|
||||
|
||||
if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
|
||||
byteslice.bytesize / 2
|
||||
|
@ -140,6 +140,25 @@ module Prism
|
||||
assert_equal 7, location.end_code_units_column(Encoding::UTF_32LE)
|
||||
end
|
||||
|
||||
def test_code_units_handles_binary_encoding_with_multibyte_characters
|
||||
# If the encoding is set to binary and the source contains multibyte
|
||||
# characters, we avoid breaking the code unit offsets, but they will
|
||||
# still be incorrect.
|
||||
|
||||
program = Prism.parse(<<~RUBY).value
|
||||
# -*- encoding: binary -*-
|
||||
|
||||
😀 + 😀
|
||||
RUBY
|
||||
|
||||
# first 😀
|
||||
location = program.statements.body.first.receiver.location
|
||||
|
||||
assert_equal 4, location.end_code_units_column(Encoding::UTF_8)
|
||||
assert_equal 4, location.end_code_units_column(Encoding::UTF_16LE)
|
||||
assert_equal 4, location.end_code_units_column(Encoding::UTF_32LE)
|
||||
end
|
||||
|
||||
def test_chop
|
||||
location = Prism.parse("foo").value.location
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user