[ruby/prism] Fix parser translator during string escaping with invalid utf-8

Instead, prefer `scan_byte` over `get_byte` since that already returns the byte as an integer, sidestepping conversion issues.

Fixes https://github.com/ruby/prism/issues/3582

https://github.com/ruby/prism/commit/7f3008b2b5
This commit is contained in:
Earlopain 2025-06-11 15:28:21 +02:00 committed by git
parent 95201299fd
commit 970813d982
4 changed files with 23 additions and 3 deletions

View File

@ -0,0 +1,14 @@
# frozen_string_literal: true
require "strscan"
# Polyfill for StringScanner#scan_byte, which didn't exist until Ruby 3.4.
if !(StringScanner.instance_methods.include?(:scan_byte))
StringScanner.include(
Module.new {
def scan_byte # :nodoc:
get_byte&.b&.ord
end
}
)
end

View File

@ -88,6 +88,7 @@ Gem::Specification.new do |spec|
"lib/prism/pattern.rb",
"lib/prism/polyfill/append_as_bytes.rb",
"lib/prism/polyfill/byteindex.rb",
"lib/prism/polyfill/scan_byte.rb",
"lib/prism/polyfill/unpack1.rb",
"lib/prism/polyfill/warn.rb",
"lib/prism/reflection.rb",

View File

@ -3,6 +3,7 @@
require "strscan"
require_relative "../../polyfill/append_as_bytes"
require_relative "../../polyfill/scan_byte"
module Prism
module Translation
@ -762,12 +763,12 @@ module Prism
elsif (value = scanner.scan(/M-\\?(?=[[:print:]])/))
# \M-x where x is an ASCII printable character
escape_read(result, scanner, control, true)
elsif (byte = scanner.get_byte)
elsif (byte = scanner.scan_byte)
# Something else after an escape.
if control && byte == "?"
if control && byte == 0x3f # ASCII '?'
result.append_as_bytes(escape_build(0x7f, false, meta))
else
result.append_as_bytes(escape_build(byte.ord, control, meta))
result.append_as_bytes(escape_build(byte, control, meta))
end
end
end

View File

@ -146,6 +146,10 @@ baz
%Q{abc}
%Q(\«)
%q(\«)
%^#$^#
%@#@#