From 6ad0f89d5aa2466cdfcd1a056ce9b28b39245033 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Fri, 1 Mar 2024 21:30:31 +0100 Subject: [PATCH] [ruby/prism] Use a more efficient StringIO on TruffleRuby * The stdlib StringIO is synchronized and this occurs a high overhead. * This is about twice as fast on TruffleRuby but surprisingly it is slower on JRuby. I am not sure why but probably @ivar access and integer arithmetic is much slower than Java field access/arithmetic on JRuby. * On CRuby interpreter it is slower, which is expected as the GVL already protects StringIO. * So we enable this only on TruffleRuby to not slow down other Rubies. * PRISM_FFI_BACKEND=true ruby -v -Ilib -rprism -rbenchmark -e '300.times { p Benchmark.realtime { Dir.glob("lib/**/*.rb") { |f| Prism.parse_file(f) } } }' ruby 3.3.0: 0.215 => 0.251 (cext: 0.062) ruby 3.3.0 YJIT: 0.118 => 0.113 (cext: 0.053) truffleruby JVM: 0.101 => 0.054 jruby 9.4.6.0: 0.162 => 0.219 jruby 9.4.6.0 indy: 0.078 => 0.086 * For the record here are the numbers for using the String directly, without a StringIO-like object: ruby 3.3.0: 0.215 => 0.234 (cext: 0.062) ruby 3.3.0 YJIT: 0.118 => 0.111 (cext: 0.053) truffleruby native: 0.101 => 0.053 jruby 9.4.6.0: 0.162 => 0.195 jruby 9.4.6.0 indy: 0.078 => 0.082 As we can see, that extra object adds a non-trivial overhead on CRuby interpreter and JRuby. But we need to make it possible to use StringIO and SimpleStringIO interchangeably. https://github.com/ruby/prism/commit/938677cbd2 --- prism/templates/lib/prism/serialize.rb.erb | 51 +++++++++++++++++----- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb index 4370363773..4f4efe7c10 100644 --- a/prism/templates/lib/prism/serialize.rb.erb +++ b/prism/templates/lib/prism/serialize.rb.erb @@ -44,6 +44,37 @@ module Prism end class Loader # :nodoc: + if RUBY_ENGINE == "truffleruby" + # StringIO is synchronized and that adds a high overhead on TruffleRuby. + class FastStringIO # :nodoc: + attr_accessor :pos + + def initialize(string) + @string = string + @pos = 0 + end + + def getbyte + byte = @string.getbyte(@pos) + @pos += 1 + byte + end + + def read(n) + slice = @string.byteslice(@pos, n) + @pos += n + slice + end + + def eof? + @pos >= @string.bytesize + end + end + else + FastStringIO = ::StringIO + end + private_constant :FastStringIO + attr_reader :encoding, :input, :serialized, :io attr_reader :constant_pool_offset, :constant_pool, :source attr_reader :start_line @@ -52,9 +83,9 @@ module Prism @encoding = Encoding::UTF_8 @input = source.source.dup + raise unless serialized.encoding == Encoding::BINARY @serialized = serialized - @io = StringIO.new(serialized) - @io.set_encoding(Encoding::BINARY) + @io = FastStringIO.new(serialized) @constant_pool_offset = nil @constant_pool = nil @@ -66,7 +97,7 @@ module Prism def load_header raise "Invalid serialization" if io.read(5) != "PRISM" raise "Invalid serialization" if io.read(3).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION] - only_semantic_fields = io.read(1).unpack1("C") + only_semantic_fields = io.getbyte unless only_semantic_fields == 0 raise "Invalid serialization (location fields must be included but are not)" end @@ -137,7 +168,7 @@ module Prism comments, magic_comments, data_loc, errors, warnings = load_metadata - @constant_pool_offset = io.read(4).unpack1("L") + @constant_pool_offset = load_uint32 @constant_pool = Array.new(load_varuint, nil) [load_node, comments, magic_comments, data_loc, errors, warnings] @@ -186,7 +217,7 @@ module Prism io.read(8).unpack1("D") end - def load_serialized_length + def load_uint32 io.read(4).unpack1("L") end @@ -234,14 +265,14 @@ module Prism unless constant offset = constant_pool_offset + index * 8 - start = serialized.unpack1("L", offset: offset) - length = serialized.unpack1("L", offset: offset + 4) + start = @serialized.unpack1("L", offset: offset) + length = @serialized.unpack1("L", offset: offset + 4) constant = if start.nobits?(1 << 31) input.byteslice(start, length).force_encoding(@encoding).to_sym else - serialized.byteslice(start & ((1 << 31) - 1), length).force_encoding(@encoding).to_sym + @serialized.byteslice(start & ((1 << 31) - 1), length).force_encoding(@encoding).to_sym end constant_pool[index] = constant @@ -294,7 +325,7 @@ module Prism <%- nodes.each_with_index do |node, index| -%> when <%= index + 1 %> then <%- if node.needs_serialized_length? -%> - load_serialized_length + load_uint32 <%- end -%> <%= node.name %>.new( source, <%= (node.fields.map { |field| @@ -331,7 +362,7 @@ module Prism -> { location = load_location <%- if node.needs_serialized_length? -%> - load_serialized_length + load_uint32 <%- end -%> <%= node.name %>.new( source, <%= (node.fields.map { |field|