[ruby/prism] Use a more efficient StringIO on TruffleRuby
* The stdlib StringIO is synchronized and this occurs a high overhead. * This is about twice as fast on TruffleRuby but surprisingly it is slower on JRuby. I am not sure why but probably @ivar access and integer arithmetic is much slower than Java field access/arithmetic on JRuby. * On CRuby interpreter it is slower, which is expected as the GVL already protects StringIO. * So we enable this only on TruffleRuby to not slow down other Rubies. * PRISM_FFI_BACKEND=true ruby -v -Ilib -rprism -rbenchmark -e '300.times { p Benchmark.realtime { Dir.glob("lib/**/*.rb") { |f| Prism.parse_file(f) } } }' ruby 3.3.0: 0.215 => 0.251 (cext: 0.062) ruby 3.3.0 YJIT: 0.118 => 0.113 (cext: 0.053) truffleruby JVM: 0.101 => 0.054 jruby 9.4.6.0: 0.162 => 0.219 jruby 9.4.6.0 indy: 0.078 => 0.086 * For the record here are the numbers for using the String directly, without a StringIO-like object: ruby 3.3.0: 0.215 => 0.234 (cext: 0.062) ruby 3.3.0 YJIT: 0.118 => 0.111 (cext: 0.053) truffleruby native: 0.101 => 0.053 jruby 9.4.6.0: 0.162 => 0.195 jruby 9.4.6.0 indy: 0.078 => 0.082 As we can see, that extra object adds a non-trivial overhead on CRuby interpreter and JRuby. But we need to make it possible to use StringIO and SimpleStringIO interchangeably. https://github.com/ruby/prism/commit/938677cbd2
This commit is contained in:
parent
03a73fdc3d
commit
6ad0f89d5a
@ -44,6 +44,37 @@ module Prism
|
||||
end
|
||||
|
||||
class Loader # :nodoc:
|
||||
if RUBY_ENGINE == "truffleruby"
|
||||
# StringIO is synchronized and that adds a high overhead on TruffleRuby.
|
||||
class FastStringIO # :nodoc:
|
||||
attr_accessor :pos
|
||||
|
||||
def initialize(string)
|
||||
@string = string
|
||||
@pos = 0
|
||||
end
|
||||
|
||||
def getbyte
|
||||
byte = @string.getbyte(@pos)
|
||||
@pos += 1
|
||||
byte
|
||||
end
|
||||
|
||||
def read(n)
|
||||
slice = @string.byteslice(@pos, n)
|
||||
@pos += n
|
||||
slice
|
||||
end
|
||||
|
||||
def eof?
|
||||
@pos >= @string.bytesize
|
||||
end
|
||||
end
|
||||
else
|
||||
FastStringIO = ::StringIO
|
||||
end
|
||||
private_constant :FastStringIO
|
||||
|
||||
attr_reader :encoding, :input, :serialized, :io
|
||||
attr_reader :constant_pool_offset, :constant_pool, :source
|
||||
attr_reader :start_line
|
||||
@ -52,9 +83,9 @@ module Prism
|
||||
@encoding = Encoding::UTF_8
|
||||
|
||||
@input = source.source.dup
|
||||
raise unless serialized.encoding == Encoding::BINARY
|
||||
@serialized = serialized
|
||||
@io = StringIO.new(serialized)
|
||||
@io.set_encoding(Encoding::BINARY)
|
||||
@io = FastStringIO.new(serialized)
|
||||
|
||||
@constant_pool_offset = nil
|
||||
@constant_pool = nil
|
||||
@ -66,7 +97,7 @@ module Prism
|
||||
def load_header
|
||||
raise "Invalid serialization" if io.read(5) != "PRISM"
|
||||
raise "Invalid serialization" if io.read(3).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION]
|
||||
only_semantic_fields = io.read(1).unpack1("C")
|
||||
only_semantic_fields = io.getbyte
|
||||
unless only_semantic_fields == 0
|
||||
raise "Invalid serialization (location fields must be included but are not)"
|
||||
end
|
||||
@ -137,7 +168,7 @@ module Prism
|
||||
|
||||
comments, magic_comments, data_loc, errors, warnings = load_metadata
|
||||
|
||||
@constant_pool_offset = io.read(4).unpack1("L")
|
||||
@constant_pool_offset = load_uint32
|
||||
@constant_pool = Array.new(load_varuint, nil)
|
||||
|
||||
[load_node, comments, magic_comments, data_loc, errors, warnings]
|
||||
@ -186,7 +217,7 @@ module Prism
|
||||
io.read(8).unpack1("D")
|
||||
end
|
||||
|
||||
def load_serialized_length
|
||||
def load_uint32
|
||||
io.read(4).unpack1("L")
|
||||
end
|
||||
|
||||
@ -234,14 +265,14 @@ module Prism
|
||||
|
||||
unless constant
|
||||
offset = constant_pool_offset + index * 8
|
||||
start = serialized.unpack1("L", offset: offset)
|
||||
length = serialized.unpack1("L", offset: offset + 4)
|
||||
start = @serialized.unpack1("L", offset: offset)
|
||||
length = @serialized.unpack1("L", offset: offset + 4)
|
||||
|
||||
constant =
|
||||
if start.nobits?(1 << 31)
|
||||
input.byteslice(start, length).force_encoding(@encoding).to_sym
|
||||
else
|
||||
serialized.byteslice(start & ((1 << 31) - 1), length).force_encoding(@encoding).to_sym
|
||||
@serialized.byteslice(start & ((1 << 31) - 1), length).force_encoding(@encoding).to_sym
|
||||
end
|
||||
|
||||
constant_pool[index] = constant
|
||||
@ -294,7 +325,7 @@ module Prism
|
||||
<%- nodes.each_with_index do |node, index| -%>
|
||||
when <%= index + 1 %> then
|
||||
<%- if node.needs_serialized_length? -%>
|
||||
load_serialized_length
|
||||
load_uint32
|
||||
<%- end -%>
|
||||
<%= node.name %>.new(
|
||||
source, <%= (node.fields.map { |field|
|
||||
@ -331,7 +362,7 @@ module Prism
|
||||
-> {
|
||||
location = load_location
|
||||
<%- if node.needs_serialized_length? -%>
|
||||
load_serialized_length
|
||||
load_uint32
|
||||
<%- end -%>
|
||||
<%= node.name %>.new(
|
||||
source, <%= (node.fields.map { |field|
|
||||
|
Loading…
x
Reference in New Issue
Block a user