`rb_vm_insns_count` is a global variable used for reporting YJIT statistics. It is a counter that tallies the number of interpreter instructions that have been executed, this way we can approximate how much time we're spending in YJIT compared to the interpreter. Unfortunately keeping this statistic means that every instruction executed in the interpreter loop must increment the counter. Normally this isn't a problem, but in multi-threaded situations (when Ractors are used), incrementing this counter can become quite costly due to page caching issues. Additionally, since there is no locking when incrementing this global, the count can't really make sense in a multi-threaded environment. This commit changes `rb_vm_insns_count` to a thread local. That way each Ractor has it's own copy of the counter and incrementing the counter becomes quite cheap. Of course this means that in multi-threaded situations, the value doesn't really make sense (but it didn't make sense before because of the lack of locking). The counter is used for YJIT statistics, and since YJIT is basically disabled when Ractors are in use, I don't think we care about inaccuracies (for the time being). We can revisit this counter when we give YJIT multi-threading support, but for the time being this commit restores multi-threaded performance. To test this, I used the benchmark in [Bug #20489]. Here is the performance on Ruby 3.2: ``` $ time RUBY_MAX_CPU=12 ./miniruby -v ../test.rb 8 8 ruby 3.2.0 (2022-12-25 revision a528908271) [x86_64-linux] [0...1, 1...2, 2...3, 3...4, 4...5, 5...6, 6...7, 7...8] ../test.rb:43: warning: Ractor is experimental, and the behavior may change in future versions of Ruby! Also there are many implementation issues. ________________________________________________________ Executed in 2.53 secs fish external usr time 19.86 secs 370.00 micros 19.86 secs sys time 0.02 secs 320.00 micros 0.02 secs ``` We can see the regression in performance on the master branch: ``` $ time RUBY_MAX_CPU=12 ./miniruby -v ../test.rb 8 8 ruby 3.5.0dev (2025-01-10T16:22:26Z master 4a2702dafb) +PRISM [x86_64-linux] [0...1, 1...2, 2...3, 3...4, 4...5, 5...6, 6...7, 7...8] ../test.rb:43: warning: Ractor is experimental, and the behavior may change in future versions of Ruby! Also there are many implementation issues. ________________________________________________________ Executed in 24.87 secs fish external usr time 195.55 secs 0.00 micros 195.55 secs sys time 0.00 secs 716.00 micros 0.00 secs ``` Here are the stats after this commit: ``` $ time RUBY_MAX_CPU=12 ./miniruby -v ../test.rb 8 8 ruby 3.5.0dev (2025-01-10T20:37:06Z tl 3ef0432779) +PRISM [x86_64-linux] [0...1, 1...2, 2...3, 3...4, 4...5, 5...6, 6...7, 7...8] ../test.rb:43: warning: Ractor is experimental, and the behavior may change in future versions of Ruby! Also there are many implementation issues. ________________________________________________________ Executed in 2.46 secs fish external usr time 19.34 secs 381.00 micros 19.34 secs sys time 0.01 secs 321.00 micros 0.01 secs ``` [Bug #20489]
155 lines
3.5 KiB
C
155 lines
3.5 KiB
C
/* -*-c-*- */
|
|
/**********************************************************************
|
|
|
|
vm_exec.c -
|
|
|
|
$Author$
|
|
|
|
Copyright (C) 2004-2007 Koichi Sasada
|
|
|
|
**********************************************************************/
|
|
|
|
#include <math.h>
|
|
|
|
#if USE_YJIT || USE_RJIT
|
|
// The number of instructions executed on vm_exec_core. --yjit-stats uses this.
|
|
RB_THREAD_LOCAL_SPECIFIER uint64_t rb_vm_insns_count = 0;
|
|
#endif
|
|
|
|
#if VM_COLLECT_USAGE_DETAILS
|
|
static void vm_analysis_insn(int insn);
|
|
#endif
|
|
|
|
#if VMDEBUG > 0
|
|
#define DECL_SC_REG(type, r, reg) register type reg_##r
|
|
|
|
#elif defined(__GNUC__) && defined(__x86_64__)
|
|
#define DECL_SC_REG(type, r, reg) register type reg_##r __asm__("r" reg)
|
|
|
|
#elif defined(__GNUC__) && defined(__i386__)
|
|
#define DECL_SC_REG(type, r, reg) register type reg_##r __asm__("e" reg)
|
|
|
|
#elif defined(__GNUC__) && (defined(__powerpc64__) || defined(__POWERPC__))
|
|
#define DECL_SC_REG(type, r, reg) register type reg_##r __asm__("r" reg)
|
|
|
|
#elif defined(__GNUC__) && defined(__aarch64__)
|
|
#define DECL_SC_REG(type, r, reg) register type reg_##r __asm__("x" reg)
|
|
|
|
#else
|
|
#define DECL_SC_REG(type, r, reg) register type reg_##r
|
|
#endif
|
|
/* #define DECL_SC_REG(r, reg) VALUE reg_##r */
|
|
|
|
#if !OPT_CALL_THREADED_CODE
|
|
static VALUE
|
|
vm_exec_core(rb_execution_context_t *ec)
|
|
{
|
|
#if defined(__GNUC__) && defined(__i386__)
|
|
DECL_SC_REG(const VALUE *, pc, "di");
|
|
DECL_SC_REG(rb_control_frame_t *, cfp, "si");
|
|
#define USE_MACHINE_REGS 1
|
|
|
|
#elif defined(__GNUC__) && defined(__x86_64__)
|
|
DECL_SC_REG(const VALUE *, pc, "14");
|
|
DECL_SC_REG(rb_control_frame_t *, cfp, "15");
|
|
#define USE_MACHINE_REGS 1
|
|
|
|
#elif defined(__GNUC__) && (defined(__powerpc64__) || defined(__POWERPC__))
|
|
DECL_SC_REG(const VALUE *, pc, "14");
|
|
DECL_SC_REG(rb_control_frame_t *, cfp, "15");
|
|
#define USE_MACHINE_REGS 1
|
|
|
|
#elif defined(__GNUC__) && defined(__aarch64__)
|
|
DECL_SC_REG(const VALUE *, pc, "19");
|
|
DECL_SC_REG(rb_control_frame_t *, cfp, "20");
|
|
#define USE_MACHINE_REGS 1
|
|
|
|
#else
|
|
register rb_control_frame_t *reg_cfp;
|
|
const VALUE *reg_pc;
|
|
#define USE_MACHINE_REGS 0
|
|
|
|
#endif
|
|
|
|
#if USE_MACHINE_REGS
|
|
|
|
#undef RESTORE_REGS
|
|
#define RESTORE_REGS() \
|
|
{ \
|
|
VM_REG_CFP = ec->cfp; \
|
|
reg_pc = reg_cfp->pc; \
|
|
}
|
|
|
|
#undef VM_REG_PC
|
|
#define VM_REG_PC reg_pc
|
|
#undef GET_PC
|
|
#define GET_PC() (reg_pc)
|
|
#undef SET_PC
|
|
#define SET_PC(x) (reg_cfp->pc = VM_REG_PC = (x))
|
|
#endif
|
|
|
|
#if OPT_TOKEN_THREADED_CODE || OPT_DIRECT_THREADED_CODE
|
|
#include "vmtc.inc"
|
|
if (UNLIKELY(ec == 0)) {
|
|
return (VALUE)insns_address_table;
|
|
}
|
|
#endif
|
|
reg_cfp = ec->cfp;
|
|
reg_pc = reg_cfp->pc;
|
|
|
|
first:
|
|
INSN_DISPATCH();
|
|
/*****************/
|
|
#include "vm.inc"
|
|
/*****************/
|
|
END_INSNS_DISPATCH();
|
|
|
|
/* unreachable */
|
|
rb_bug("vm_eval: unreachable");
|
|
goto first;
|
|
}
|
|
|
|
const void **
|
|
rb_vm_get_insns_address_table(void)
|
|
{
|
|
return (const void **)vm_exec_core(0);
|
|
}
|
|
|
|
#else /* OPT_CALL_THREADED_CODE */
|
|
|
|
#include "vm.inc"
|
|
#include "vmtc.inc"
|
|
|
|
const void **
|
|
rb_vm_get_insns_address_table(void)
|
|
{
|
|
return (const void **)insns_address_table;
|
|
}
|
|
|
|
static VALUE
|
|
vm_exec_core(rb_execution_context_t *ec)
|
|
{
|
|
register rb_control_frame_t *reg_cfp = ec->cfp;
|
|
rb_thread_t *th;
|
|
|
|
while (1) {
|
|
reg_cfp = ((rb_insn_func_t) (*GET_PC()))(ec, reg_cfp);
|
|
|
|
if (UNLIKELY(reg_cfp == 0)) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!UNDEF_P((th = rb_ec_thread_ptr(ec))->retval)) {
|
|
VALUE ret = th->retval;
|
|
th->retval = Qundef;
|
|
return ret;
|
|
}
|
|
else {
|
|
VALUE err = ec->errinfo;
|
|
ec->errinfo = Qnil;
|
|
return err;
|
|
}
|
|
}
|
|
#endif
|