From 34715bdd910698e1aa9770b36c2b6e38e708c629 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sat, 3 Aug 2024 00:53:13 +0000 Subject: [PATCH] Tune codegen for rb_yield() calls landing in ISeqs Unlike in older revisions in the year, GCC 11 isn't inlining the call to vm_push_frame() inside invoke_iseq_block_from_c() anymore. We do want it to be inlined since rb_yield() speed is fairly important. Logs from -fopt-info-optimized-inline reveal that GCC was blowing its code size budget inlining invoke_block_from_c_bh() into its various callers, leaving suboptimal code for its body. Take away some uses of the `inline` keyword and merge a common tail call to vm_exec() for overall better code. This tweak gives about 18% on a micro benchmark and 1% on the chunky-png benchmark from yjit-bench. I tested on a Skylake server. ``` $ cat c-to-ruby-call.yml benchmark: - 0.upto(10_000_000) {} $ benchmark-driver --chruby '+patch;master' c-to-ruby-call.yml Warming up -------------------------------------- 0.upto(10_000_000) {} 2.299 i/s - 3.000 times in 1.304689s (434.90ms/i) Calculating ------------------------------------- +patch master 0.upto(10_000_000) {} 2.299 1.943 i/s - 6.000 times in 2.609393s 3.088353s Comparison: 0.upto(10_000_000) {} +patch: 2.3 i/s master: 1.9 i/s - 1.18x slower $ ruby run_benchmarks.rb --chruby 'master;+patch' chunky-png ---------- ----------- ---------- ----------- ---------- -------------- ------------- bench master (ms) stddev (%) +patch (ms) stddev (%) +patch 1st itr master/+patch chunky-png 1156.1 0.1 1142.2 0.2 1.01 1.01 ---------- ----------- ---------- ----------- ---------- -------------- ------------- ``` --- vm.c | 17 +++++++---------- vm_insnhelper.c | 2 +- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/vm.c b/vm.c index b0ea81da31..17f34399b9 100644 --- a/vm.c +++ b/vm.c @@ -1509,7 +1509,7 @@ rb_binding_add_dynavars(VALUE bindval, rb_binding_t *bind, int dyncount, const I /* C -> Ruby: block */ -static inline VALUE +static inline void invoke_block(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, const struct rb_captured_block *captured, const rb_cref_t *cref, VALUE type, int opt_pc) { int arg_size = ISEQ_BODY(iseq)->param.size; @@ -1521,15 +1521,13 @@ invoke_block(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, cons ec->cfp->sp + arg_size, ISEQ_BODY(iseq)->local_table_size - arg_size, ISEQ_BODY(iseq)->stack_max); - return vm_exec(ec); } -static VALUE +static inline void invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, const struct rb_captured_block *captured, const rb_callable_method_entry_t *me, VALUE type, int opt_pc) { /* bmethod call from outside the VM */ int arg_size = ISEQ_BODY(iseq)->param.size; - VALUE ret; VM_ASSERT(me->def->type == VM_METHOD_TYPE_BMETHOD); @@ -1542,9 +1540,6 @@ invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, co ISEQ_BODY(iseq)->stack_max); VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH); - ret = vm_exec(ec); - - return ret; } ALWAYS_INLINE(static VALUE @@ -1591,14 +1586,16 @@ invoke_iseq_block_from_c(rb_execution_context_t *ec, const struct rb_captured_bl cfp->sp = sp; if (me == NULL) { - return invoke_block(ec, iseq, self, captured, cref, type, opt_pc); + invoke_block(ec, iseq, self, captured, cref, type, opt_pc); } else { - return invoke_bmethod(ec, iseq, self, captured, me, type, opt_pc); + invoke_bmethod(ec, iseq, self, captured, me, type, opt_pc); } + + return vm_exec(ec); } -static inline VALUE +static VALUE invoke_block_from_c_bh(rb_execution_context_t *ec, VALUE block_handler, int argc, const VALUE *argv, int kw_splat, VALUE passed_block_handler, const rb_cref_t *cref, diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 7c89b54dd7..4e4959a651 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -3751,7 +3751,7 @@ vm_method_cfunc_entry(const rb_callable_method_entry_t *me) return UNALIGNED_MEMBER_PTR(me->def, body.cfunc); } -static inline VALUE +static VALUE vm_call_cfunc_with_frame_(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling, int argc, VALUE *argv, VALUE *stack_bottom) {