YJIT: port call threshold logic from Rust to C for performance (#8628)
* Port call threshold logic from Rust to C for performance * Prefix global/field names with yjit_ * Fix linker error * Fix preprocessor condition for rb_yjit_threshold_hit * Fix third linker issue * Exclude yjit_calls_at_interv from RJIT bindgen --------- Co-authored-by: Takashi Kokubun <takashikkbn@gmail.com>
This commit is contained in:
parent
0c42c28531
commit
b2e1ddffa5
@ -637,7 +637,7 @@ generator = BindingGenerator.new(
|
||||
skip_fields: {
|
||||
'rb_execution_context_struct.machine': %w[regs], # differs between macOS and Linux
|
||||
rb_execution_context_struct: %w[method_missing_reason], # non-leading bit fields not supported
|
||||
rb_iseq_constant_body: %w[jit_exception jit_exception_calls yjit_payload], # conditionally defined
|
||||
rb_iseq_constant_body: %w[jit_exception jit_exception_calls yjit_payload yjit_calls_at_interv], # conditionally defined
|
||||
rb_thread_struct: %w[status has_dedicated_nt to_kill abort_on_exception report_on_exception pending_interrupt_queue_checked],
|
||||
:'' => %w[is_from_method is_lambda is_isolated], # rb_proc_t
|
||||
},
|
||||
|
51
vm.c
51
vm.c
@ -369,6 +369,49 @@ extern VALUE rb_vm_invoke_bmethod(rb_execution_context_t *ec, rb_proc_t *proc, V
|
||||
const rb_callable_method_entry_t *me);
|
||||
static VALUE vm_invoke_proc(rb_execution_context_t *ec, rb_proc_t *proc, VALUE self, int argc, const VALUE *argv, int kw_splat, VALUE block_handler);
|
||||
|
||||
#if USE_YJIT
|
||||
// Counter to serve as a proxy for execution time, total number of calls
|
||||
static uint64_t yjit_total_entry_hits = 0;
|
||||
|
||||
// Number of calls used to estimate how hot an ISEQ is
|
||||
#define YJIT_CALL_COUNT_INTERV 20u
|
||||
|
||||
/// Test whether we are ready to compile an ISEQ or not
|
||||
static inline bool
|
||||
rb_yjit_threshold_hit(const rb_iseq_t *iseq, uint64_t entry_calls)
|
||||
{
|
||||
yjit_total_entry_hits += 1;
|
||||
|
||||
// Record the number of calls at the beginning of the interval
|
||||
if (entry_calls + YJIT_CALL_COUNT_INTERV == rb_yjit_call_threshold) {
|
||||
iseq->body->yjit_calls_at_interv = yjit_total_entry_hits;
|
||||
}
|
||||
|
||||
// Try to estimate the total time taken (total number of calls) to reach 20 calls to this ISEQ
|
||||
// This give us a ratio of how hot/cold this ISEQ is
|
||||
if (entry_calls == rb_yjit_call_threshold) {
|
||||
// We expect threshold 1 to compile everything immediately
|
||||
if (rb_yjit_call_threshold < YJIT_CALL_COUNT_INTERV) {
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t num_calls = yjit_total_entry_hits - iseq->body->yjit_calls_at_interv;
|
||||
|
||||
// Reject ISEQs that don't get called often enough
|
||||
if (num_calls > rb_yjit_cold_threshold) {
|
||||
rb_yjit_incr_counter("cold_iseq_entry");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
#define rb_yjit_threshold_hit(iseq, entry_calls) false
|
||||
#endif
|
||||
|
||||
#if USE_RJIT || USE_YJIT
|
||||
// Generate JIT code that supports the following kinds of ISEQ entries:
|
||||
// * The first ISEQ on vm_exec (e.g. <main>, or Ruby methods/blocks
|
||||
@ -396,10 +439,8 @@ jit_compile(rb_execution_context_t *ec)
|
||||
rb_yjit_compile_iseq(iseq, ec, false);
|
||||
}
|
||||
}
|
||||
else { // rb_rjit_call_p
|
||||
if (body->jit_entry_calls == rb_rjit_call_threshold()) {
|
||||
rb_rjit_compile(iseq);
|
||||
}
|
||||
else if (body->jit_entry_calls == rb_rjit_call_threshold()) {
|
||||
rb_rjit_compile(iseq);
|
||||
}
|
||||
}
|
||||
return body->jit_entry;
|
||||
@ -442,7 +483,7 @@ jit_compile_exception(rb_execution_context_t *ec)
|
||||
// Increment the ISEQ's call counter and trigger JIT compilation if not compiled
|
||||
if (body->jit_exception == NULL) {
|
||||
body->jit_exception_calls++;
|
||||
if (body->jit_exception_calls == rb_yjit_call_threshold()) {
|
||||
if (body->jit_exception_calls == rb_yjit_call_threshold) {
|
||||
rb_yjit_compile_iseq(iseq, ec, true);
|
||||
}
|
||||
}
|
||||
|
@ -524,6 +524,8 @@ struct rb_iseq_constant_body {
|
||||
#if USE_YJIT
|
||||
// YJIT stores some data on each iseq.
|
||||
void *yjit_payload;
|
||||
// Used to estimate how frequently this ISEQ gets called
|
||||
uint64_t yjit_calls_at_interv;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
8
yjit.h
8
yjit.h
@ -25,10 +25,11 @@
|
||||
#endif
|
||||
|
||||
// Expose these as declarations since we are building YJIT.
|
||||
extern uint64_t rb_yjit_call_threshold;
|
||||
extern uint64_t rb_yjit_cold_threshold;
|
||||
void rb_yjit_incr_counter(const char *counter_name);
|
||||
bool rb_yjit_enabled_p(void);
|
||||
bool rb_yjit_compile_new_iseqs(void);
|
||||
unsigned long rb_yjit_call_threshold(void);
|
||||
bool rb_yjit_threshold_hit(const rb_iseq_t *const iseq, unsigned long total_calls);
|
||||
void rb_yjit_invalidate_all_method_lookup_assumptions(void);
|
||||
void rb_yjit_cme_invalidate(rb_callable_method_entry_t *cme);
|
||||
void rb_yjit_collect_binding_alloc(void);
|
||||
@ -49,10 +50,9 @@ void rb_yjit_show_usage(int help, int highlight, unsigned int width, int columns
|
||||
// !USE_YJIT
|
||||
// In these builds, YJIT could never be turned on. Provide dummy implementations.
|
||||
|
||||
static inline void rb_yjit_incr_counter(const char *counter_name) {}
|
||||
static inline bool rb_yjit_enabled_p(void) { return false; }
|
||||
static inline bool rb_yjit_compile_new_iseqs(void) { return false; }
|
||||
static inline unsigned long rb_yjit_call_threshold(void) { return 0; }
|
||||
static inline bool rb_yjit_threshold_hit(const rb_iseq_t *const iseq, unsigned long total_calls) { return false; }
|
||||
static inline void rb_yjit_invalidate_all_method_lookup_assumptions(void) {}
|
||||
static inline void rb_yjit_cme_invalidate(rb_callable_method_entry_t *cme) {}
|
||||
static inline void rb_yjit_collect_binding_alloc(void) {}
|
||||
|
2
yjit.rb
2
yjit.rb
@ -317,7 +317,7 @@ module RubyVM::YJIT
|
||||
out.puts "bindings_set: " + format_number(13, stats[:binding_set])
|
||||
out.puts "compilation_failure: " + format_number(13, compilation_failure) if compilation_failure != 0
|
||||
out.puts "compiled_iseq_entry: " + format_number(13, stats[:compiled_iseq_entry])
|
||||
out.puts "cold_iseq_entry: " + format_number_pct(13, stats[:cold_iseq_entry], stats[:compiled_iseq_entry])
|
||||
out.puts "cold_iseq_entry: " + format_number_pct(13, stats[:cold_iseq_entry], stats[:compiled_iseq_entry] + stats[:cold_iseq_entry])
|
||||
out.puts "compiled_iseq_count: " + format_number(13, stats[:compiled_iseq_count])
|
||||
out.puts "compiled_blockid_count:" + format_number(13, stats[:compiled_blockid_count])
|
||||
out.puts "compiled_block_count: " + format_number(13, stats[:compiled_block_count])
|
||||
|
@ -976,7 +976,6 @@ impl fmt::Debug for MutableBranchList {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// This is all the data YJIT stores on an iseq
|
||||
/// This will be dynamically allocated by C code
|
||||
/// C code should pass an &mut IseqPayload to us
|
||||
@ -995,9 +994,6 @@ pub struct IseqPayload {
|
||||
// Blocks that are invalidated but are not yet deallocated.
|
||||
// The code GC will free them later.
|
||||
pub dead_blocks: Vec<BlockRef>,
|
||||
|
||||
// Used to estimate how frequently this ISEQ gets called
|
||||
pub call_count_at_interv: u64,
|
||||
}
|
||||
|
||||
impl IseqPayload {
|
||||
|
@ -2,6 +2,18 @@ use std::{ffi::{CStr, CString}, ptr::null};
|
||||
use crate::backend::current::TEMP_REGS;
|
||||
use std::os::raw::{c_char, c_int, c_uint};
|
||||
|
||||
// This option is exposed to the C side a a global variable for performance, see vm.c
|
||||
// Number of method calls after which to start generating code
|
||||
// Threshold==1 means compile on first execution
|
||||
#[no_mangle]
|
||||
static mut rb_yjit_call_threshold: u64 = 30;
|
||||
|
||||
// This option is exposed to the C side a a global variable for performance, see vm.c
|
||||
// Number of execution requests after which a method is no longer
|
||||
// considered hot. Raising this results in more generated code.
|
||||
#[no_mangle]
|
||||
static mut rb_yjit_cold_threshold: u64 = 200_000;
|
||||
|
||||
// Command-line options
|
||||
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||
#[repr(C)]
|
||||
@ -10,14 +22,6 @@ pub struct Options {
|
||||
// Note that the command line argument is expressed in MiB and not bytes
|
||||
pub exec_mem_size: usize,
|
||||
|
||||
// Number of method calls after which to start generating code
|
||||
// Threshold==1 means compile on first execution
|
||||
pub call_threshold: usize,
|
||||
|
||||
// Number of execution requests after which a method is no longer
|
||||
// considered hot. Raising this results in more generated code.
|
||||
pub cold_threshold: usize,
|
||||
|
||||
// Generate versions greedily until the limit is hit
|
||||
pub greedy_versioning: bool,
|
||||
|
||||
@ -63,8 +67,6 @@ pub struct Options {
|
||||
// Initialize the options to default values
|
||||
pub static mut OPTIONS: Options = Options {
|
||||
exec_mem_size: 128 * 1024 * 1024,
|
||||
call_threshold: 30,
|
||||
cold_threshold: 200_000,
|
||||
greedy_versioning: false,
|
||||
no_type_prop: false,
|
||||
max_versions: 4,
|
||||
@ -155,14 +157,14 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
|
||||
},
|
||||
|
||||
("call-threshold", _) => match opt_val.parse() {
|
||||
Ok(n) => unsafe { OPTIONS.call_threshold = n },
|
||||
Ok(n) => unsafe { rb_yjit_call_threshold = n },
|
||||
Err(_) => {
|
||||
return None;
|
||||
}
|
||||
},
|
||||
|
||||
("cold-threshold", _) => match opt_val.parse() {
|
||||
Ok(n) => unsafe { OPTIONS.cold_threshold = n },
|
||||
Ok(n) => unsafe { rb_yjit_cold_threshold = n },
|
||||
Err(_) => {
|
||||
return None;
|
||||
}
|
||||
|
@ -567,6 +567,21 @@ pub extern "C" fn rb_yjit_get_exit_locations(_ec: EcPtr, _ruby_self: VALUE) -> V
|
||||
}
|
||||
}
|
||||
|
||||
/// Increment a counter by name from the CRuby side
|
||||
/// Warning: this is not fast because it requires a hash lookup, so don't use in tight loops
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rb_yjit_incr_counter(counter_name: *const std::os::raw::c_char) {
|
||||
use std::ffi::CStr;
|
||||
|
||||
if !get_option!(gen_stats) {
|
||||
return;
|
||||
}
|
||||
|
||||
let counter_name = unsafe { CStr::from_ptr(counter_name).to_str().unwrap() };
|
||||
let counter_ptr = get_counter_ptr(counter_name);
|
||||
unsafe { *counter_ptr += 1 };
|
||||
}
|
||||
|
||||
/// Export all YJIT statistics as a Ruby hash.
|
||||
fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
|
||||
// If YJIT is not enabled, return Qnil
|
||||
|
@ -46,57 +46,6 @@ pub fn yjit_enabled_p() -> bool {
|
||||
YJIT_ENABLED.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
/// Make the call threshold available to C
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rb_yjit_call_threshold() -> raw::c_ulong {
|
||||
get_option!(call_threshold) as raw::c_ulong
|
||||
}
|
||||
|
||||
// Counter to serve as a proxy for execution time, total number of calls
|
||||
static mut TOTAL_ENTRY_HITS: u64 = 0;
|
||||
|
||||
// Number of calls used to estimate how hot an ISEQ is
|
||||
static CALL_COUNT_INTERV: u64 = 20;
|
||||
|
||||
/// Test whether we are ready to compile an ISEQ or not
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rb_yjit_threshold_hit(iseq: IseqPtr, total_calls: u64) -> bool {
|
||||
|
||||
let call_threshold = get_option!(call_threshold) as u64;
|
||||
|
||||
unsafe { TOTAL_ENTRY_HITS += 1; }
|
||||
|
||||
// Record the number of calls at the beginning of the interval
|
||||
if total_calls + CALL_COUNT_INTERV == call_threshold {
|
||||
let payload = get_or_create_iseq_payload(iseq);
|
||||
let call_count = unsafe { TOTAL_ENTRY_HITS };
|
||||
payload.call_count_at_interv = call_count;
|
||||
}
|
||||
|
||||
// Try to estimate the total time taken (total number of calls) to reach 20 calls to this ISEQ
|
||||
// This give us a ratio of how hot/cold this ISEQ is
|
||||
if total_calls == call_threshold {
|
||||
// We expect threshold 1 to compile everything immediately
|
||||
if call_threshold < CALL_COUNT_INTERV {
|
||||
return true;
|
||||
}
|
||||
|
||||
let payload = get_or_create_iseq_payload(iseq);
|
||||
let call_count = unsafe { TOTAL_ENTRY_HITS };
|
||||
let num_calls = call_count - payload.call_count_at_interv;
|
||||
|
||||
// Reject ISEQs that don't get called often enough
|
||||
if num_calls > get_option!(cold_threshold) as u64 {
|
||||
incr_counter!(cold_iseq_entry);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// This function is called from C code
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rb_yjit_init_rust() {
|
||||
|
Loading…
x
Reference in New Issue
Block a user