YJIT: add heuristic to avoid compiling cold ISEQs (#8522)
* YJIT: Add counter to measure how often we compile "cold" ISEQs (#535) Fix counter name in DEFAULT_COUNTERS YJIT: add --yjit-cold-threshold, don't compile cold ISEQs YJIT: increase default cold threshold to 200_000 Remove rb_yjit_call_threshold() Remove conflict markers Fix compilation errors Threshold 1 should compile immediately Debug deadlock issue with test_ractor Fix call threshold issue with tests * Revert exception threshold logic. Document option in yjid.md * (void) for 0 parameter functions in C99 * Rename iseq_entry_cold => cold_iseq_entry * Document --yjit-cold-threshold in ruby.c * Update doc/yjit/yjit.md Co-authored-by: Jean byroot Boussier <jean.boussier+github@shopify.com> * Shorten help string to appease test * Address bug found by Kokubun. Reorder logic. --------- Co-authored-by: Alan Wu <XrXr@users.noreply.github.com> Co-authored-by: Jean byroot Boussier <jean.boussier+github@shopify.com>
This commit is contained in:
parent
d47af93110
commit
ea491802fa
@ -165,9 +165,11 @@ YJIT supports all command-line options supported by upstream CRuby, but also add
|
||||
|
||||
- `--yjit`: enable YJIT (disabled by default)
|
||||
- `--yjit-call-threshold=N`: number of calls after which YJIT begins to compile a function (default 30)
|
||||
- `--yjit-cold-threshold=N`: number of global calls after which an ISEQ is considered cold and not
|
||||
compiled, lower values mean less code is compiled (default 200000)
|
||||
- `--yjit-exec-mem-size=N`: size of the executable memory block to allocate, in MiB (default 64 MiB in Ruby 3.2, 128 MiB in Ruby 3.3+)
|
||||
- `--yjit-stats`: print statistics after the execution of a program (incurs a run-time cost)
|
||||
- `--yjit-stats=quiet`: gather statistics while running a program but don't print them. Stats are accessible through `RubyVM::YJIT.runtime_stats`. (incurs a run-time cost)
|
||||
- `--yjit-stats=quiet`: gather statistics while running a program but don't print them. Stats are accessible through `RubyVM::YJIT.runtime_stats`. (incurs a run-time cost)
|
||||
- `--yjit-trace-exits`: produce a Marshal dump of backtraces from specific exits. Automatically enables `--yjit-stats` (must configure and build with `--enable-yjit=stats` to use this)
|
||||
- `--yjit-max-versions=N`: maximum number of versions to generate per basic block (default 4)
|
||||
- `--yjit-greedy-versioning`: greedy versioning mode (disabled by default, may increase code size)
|
||||
|
1
ruby.c
1
ruby.c
@ -374,6 +374,7 @@ usage(const char *name, int help, int highlight, int columns)
|
||||
M("--yjit-trace-exits-sample-rate", "", "Trace exit locations only every Nth occurrence"),
|
||||
M("--yjit-exec-mem-size=num", "", "Size of executable memory block in MiB (default: 128)"),
|
||||
M("--yjit-call-threshold=num", "", "Number of calls to trigger JIT (default: 30)"),
|
||||
M("--yjit-cold-threshold=num", "", "Global call after which ISEQs not compiled (default: 200K)"),
|
||||
M("--yjit-max-versions=num", "", "Maximum number of versions per basic block (default: 4)"),
|
||||
M("--yjit-greedy-versioning", "", "Greedy versioning mode (default: disabled)"),
|
||||
};
|
||||
|
3
vm.c
3
vm.c
@ -442,10 +442,11 @@ jit_compile_exception(rb_execution_context_t *ec)
|
||||
// Increment the ISEQ's call counter and trigger JIT compilation if not compiled
|
||||
if (body->jit_exception == NULL) {
|
||||
body->jit_exception_calls++;
|
||||
if (rb_yjit_threshold_hit(iseq, body->jit_exception_calls)) {
|
||||
if (body->jit_exception_calls == rb_yjit_call_threshold()) {
|
||||
rb_yjit_compile_iseq(iseq, ec, true);
|
||||
}
|
||||
}
|
||||
|
||||
return body->jit_exception;
|
||||
}
|
||||
|
||||
|
2
yjit.h
2
yjit.h
@ -27,6 +27,7 @@
|
||||
// Expose these as declarations since we are building YJIT.
|
||||
bool rb_yjit_enabled_p(void);
|
||||
bool rb_yjit_compile_new_iseqs(void);
|
||||
unsigned long rb_yjit_call_threshold(void);
|
||||
bool rb_yjit_threshold_hit(const rb_iseq_t *const iseq, unsigned long total_calls);
|
||||
void rb_yjit_invalidate_all_method_lookup_assumptions(void);
|
||||
void rb_yjit_cme_invalidate(rb_callable_method_entry_t *cme);
|
||||
@ -49,6 +50,7 @@ void rb_yjit_tracing_invalidate_all(void);
|
||||
|
||||
static inline bool rb_yjit_enabled_p(void) { return false; }
|
||||
static inline bool rb_yjit_compile_new_iseqs(void) { return false; }
|
||||
static inline unsigned long rb_yjit_call_threshold(void) { return 0; }
|
||||
static inline bool rb_yjit_threshold_hit(const rb_iseq_t *const iseq, unsigned long total_calls) { return false; }
|
||||
static inline void rb_yjit_invalidate_all_method_lookup_assumptions(void) {}
|
||||
static inline void rb_yjit_cme_invalidate(rb_callable_method_entry_t *cme) {}
|
||||
|
1
yjit.rb
1
yjit.rb
@ -317,6 +317,7 @@ module RubyVM::YJIT
|
||||
out.puts "bindings_set: " + format_number(13, stats[:binding_set])
|
||||
out.puts "compilation_failure: " + format_number(13, compilation_failure) if compilation_failure != 0
|
||||
out.puts "compiled_iseq_entry: " + format_number(13, stats[:compiled_iseq_entry])
|
||||
out.puts "cold_iseq_entry: " + format_number_pct(13, stats[:cold_iseq_entry], stats[:compiled_iseq_entry])
|
||||
out.puts "compiled_iseq_count: " + format_number(13, stats[:compiled_iseq_count])
|
||||
out.puts "compiled_blockid_count:" + format_number(13, stats[:compiled_blockid_count])
|
||||
out.puts "compiled_block_count: " + format_number(13, stats[:compiled_block_count])
|
||||
|
@ -986,6 +986,9 @@ pub struct IseqPayload {
|
||||
// Blocks that are invalidated but are not yet deallocated.
|
||||
// The code GC will free them later.
|
||||
pub dead_blocks: Vec<BlockRef>,
|
||||
|
||||
// Used to estimate how frequently this ISEQ gets called
|
||||
pub call_count_at_interv: u64,
|
||||
}
|
||||
|
||||
impl IseqPayload {
|
||||
|
@ -13,6 +13,10 @@ pub struct Options {
|
||||
// Threshold==1 means compile on first execution
|
||||
pub call_threshold: usize,
|
||||
|
||||
// Number of execution requests after which a method is no longer
|
||||
// considered hot. Raising this results in more generated code.
|
||||
pub cold_threshold: usize,
|
||||
|
||||
// Generate versions greedily until the limit is hit
|
||||
pub greedy_versioning: bool,
|
||||
|
||||
@ -59,6 +63,7 @@ pub struct Options {
|
||||
pub static mut OPTIONS: Options = Options {
|
||||
exec_mem_size: 128 * 1024 * 1024,
|
||||
call_threshold: 30,
|
||||
cold_threshold: 200_000,
|
||||
greedy_versioning: false,
|
||||
no_type_prop: false,
|
||||
max_versions: 4,
|
||||
@ -143,6 +148,13 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
|
||||
}
|
||||
},
|
||||
|
||||
("cold-threshold", _) => match opt_val.parse() {
|
||||
Ok(n) => unsafe { OPTIONS.cold_threshold = n },
|
||||
Err(_) => {
|
||||
return None;
|
||||
}
|
||||
},
|
||||
|
||||
("max-versions", _) => match opt_val.parse() {
|
||||
Ok(n) => unsafe { OPTIONS.max_versions = n },
|
||||
Err(_) => {
|
||||
|
@ -198,9 +198,10 @@ macro_rules! make_counters {
|
||||
|
||||
/// The list of counters that are available without --yjit-stats.
|
||||
/// They are incremented only by `incr_counter!` and don't use `gen_counter_incr`.
|
||||
pub const DEFAULT_COUNTERS: [Counter; 7] = [
|
||||
pub const DEFAULT_COUNTERS: [Counter; 8] = [
|
||||
Counter::code_gc_count,
|
||||
Counter::compiled_iseq_entry,
|
||||
Counter::cold_iseq_entry,
|
||||
Counter::compiled_iseq_count,
|
||||
Counter::compiled_blockid_count,
|
||||
Counter::compiled_block_count,
|
||||
@ -441,6 +442,7 @@ make_counters! {
|
||||
binding_set,
|
||||
|
||||
compiled_iseq_entry,
|
||||
cold_iseq_entry,
|
||||
compiled_iseq_count,
|
||||
compiled_blockid_count,
|
||||
compiled_block_count,
|
||||
|
@ -46,11 +46,55 @@ pub fn yjit_enabled_p() -> bool {
|
||||
YJIT_ENABLED.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
/// Make the call threshold available to C
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rb_yjit_call_threshold() -> raw::c_ulong {
|
||||
get_option!(call_threshold) as raw::c_ulong
|
||||
}
|
||||
|
||||
// Counter to serve as a proxy for execution time, total number of calls
|
||||
static mut TOTAL_ENTRY_HITS: u64 = 0;
|
||||
|
||||
// Number of calls used to estimate how hot an ISEQ is
|
||||
static CALL_COUNT_INTERV: u64 = 20;
|
||||
|
||||
/// Test whether we are ready to compile an ISEQ or not
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rb_yjit_threshold_hit(_iseq: IseqPtr, total_calls: u64) -> bool {
|
||||
pub extern "C" fn rb_yjit_threshold_hit(iseq: IseqPtr, total_calls: u64) -> bool {
|
||||
|
||||
let call_threshold = get_option!(call_threshold) as u64;
|
||||
return total_calls == call_threshold;
|
||||
|
||||
unsafe { TOTAL_ENTRY_HITS += 1; }
|
||||
|
||||
// Record the number of calls at the beginning of the interval
|
||||
if total_calls + CALL_COUNT_INTERV == call_threshold {
|
||||
let payload = get_or_create_iseq_payload(iseq);
|
||||
let call_count = unsafe { TOTAL_ENTRY_HITS };
|
||||
payload.call_count_at_interv = call_count;
|
||||
}
|
||||
|
||||
// Try to estimate the total time taken (total number of calls) to reach 20 calls to this ISEQ
|
||||
// This give us a ratio of how hot/cold this ISEQ is
|
||||
if total_calls == call_threshold {
|
||||
// We expect threshold 1 to compile everything immediately
|
||||
if call_threshold < CALL_COUNT_INTERV {
|
||||
return true;
|
||||
}
|
||||
|
||||
let payload = get_or_create_iseq_payload(iseq);
|
||||
let call_count = unsafe { TOTAL_ENTRY_HITS };
|
||||
let num_calls = call_count - payload.call_count_at_interv;
|
||||
|
||||
// Reject ISEQs that don't get called often enough
|
||||
if num_calls > get_option!(cold_threshold) as u64 {
|
||||
incr_counter!(cold_iseq_entry);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// This function is called from C code
|
||||
|
Loading…
x
Reference in New Issue
Block a user