YJIT: add heuristic to avoid compiling cold ISEQs (#8522)

* YJIT: Add counter to measure how often we compile "cold" ISEQs (#535)

Fix counter name in DEFAULT_COUNTERS

YJIT: add --yjit-cold-threshold, don't compile cold ISEQs

YJIT: increase default cold threshold to 200_000

Remove rb_yjit_call_threshold()

Remove conflict markers

Fix compilation errors

Threshold 1 should compile immediately

Debug deadlock issue with test_ractor

Fix call threshold issue with tests

* Revert exception threshold logic. Document option in yjid.md

* (void) for 0 parameter functions in C99

* Rename iseq_entry_cold => cold_iseq_entry

* Document --yjit-cold-threshold in ruby.c

* Update doc/yjit/yjit.md

Co-authored-by: Jean byroot Boussier <jean.boussier+github@shopify.com>

* Shorten help string to appease test

* Address bug found by Kokubun. Reorder logic.

---------

Co-authored-by: Alan Wu <XrXr@users.noreply.github.com>
Co-authored-by: Jean byroot Boussier <jean.boussier+github@shopify.com>
This commit is contained in:
Maxime Chevalier-Boisvert 2023-10-03 17:45:46 -04:00 committed by GitHub
parent d47af93110
commit ea491802fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 73 additions and 5 deletions

View File

@ -165,9 +165,11 @@ YJIT supports all command-line options supported by upstream CRuby, but also add
- `--yjit`: enable YJIT (disabled by default)
- `--yjit-call-threshold=N`: number of calls after which YJIT begins to compile a function (default 30)
- `--yjit-cold-threshold=N`: number of global calls after which an ISEQ is considered cold and not
compiled, lower values mean less code is compiled (default 200000)
- `--yjit-exec-mem-size=N`: size of the executable memory block to allocate, in MiB (default 64 MiB in Ruby 3.2, 128 MiB in Ruby 3.3+)
- `--yjit-stats`: print statistics after the execution of a program (incurs a run-time cost)
- `--yjit-stats=quiet`: gather statistics while running a program but don't print them. Stats are accessible through `RubyVM::YJIT.runtime_stats`. (incurs a run-time cost)
- `--yjit-stats=quiet`: gather statistics while running a program but don't print them. Stats are accessible through `RubyVM::YJIT.runtime_stats`. (incurs a run-time cost)
- `--yjit-trace-exits`: produce a Marshal dump of backtraces from specific exits. Automatically enables `--yjit-stats` (must configure and build with `--enable-yjit=stats` to use this)
- `--yjit-max-versions=N`: maximum number of versions to generate per basic block (default 4)
- `--yjit-greedy-versioning`: greedy versioning mode (disabled by default, may increase code size)

1
ruby.c
View File

@ -374,6 +374,7 @@ usage(const char *name, int help, int highlight, int columns)
M("--yjit-trace-exits-sample-rate", "", "Trace exit locations only every Nth occurrence"),
M("--yjit-exec-mem-size=num", "", "Size of executable memory block in MiB (default: 128)"),
M("--yjit-call-threshold=num", "", "Number of calls to trigger JIT (default: 30)"),
M("--yjit-cold-threshold=num", "", "Global call after which ISEQs not compiled (default: 200K)"),
M("--yjit-max-versions=num", "", "Maximum number of versions per basic block (default: 4)"),
M("--yjit-greedy-versioning", "", "Greedy versioning mode (default: disabled)"),
};

3
vm.c
View File

@ -442,10 +442,11 @@ jit_compile_exception(rb_execution_context_t *ec)
// Increment the ISEQ's call counter and trigger JIT compilation if not compiled
if (body->jit_exception == NULL) {
body->jit_exception_calls++;
if (rb_yjit_threshold_hit(iseq, body->jit_exception_calls)) {
if (body->jit_exception_calls == rb_yjit_call_threshold()) {
rb_yjit_compile_iseq(iseq, ec, true);
}
}
return body->jit_exception;
}

2
yjit.h
View File

@ -27,6 +27,7 @@
// Expose these as declarations since we are building YJIT.
bool rb_yjit_enabled_p(void);
bool rb_yjit_compile_new_iseqs(void);
unsigned long rb_yjit_call_threshold(void);
bool rb_yjit_threshold_hit(const rb_iseq_t *const iseq, unsigned long total_calls);
void rb_yjit_invalidate_all_method_lookup_assumptions(void);
void rb_yjit_cme_invalidate(rb_callable_method_entry_t *cme);
@ -49,6 +50,7 @@ void rb_yjit_tracing_invalidate_all(void);
static inline bool rb_yjit_enabled_p(void) { return false; }
static inline bool rb_yjit_compile_new_iseqs(void) { return false; }
static inline unsigned long rb_yjit_call_threshold(void) { return 0; }
static inline bool rb_yjit_threshold_hit(const rb_iseq_t *const iseq, unsigned long total_calls) { return false; }
static inline void rb_yjit_invalidate_all_method_lookup_assumptions(void) {}
static inline void rb_yjit_cme_invalidate(rb_callable_method_entry_t *cme) {}

View File

@ -317,6 +317,7 @@ module RubyVM::YJIT
out.puts "bindings_set: " + format_number(13, stats[:binding_set])
out.puts "compilation_failure: " + format_number(13, compilation_failure) if compilation_failure != 0
out.puts "compiled_iseq_entry: " + format_number(13, stats[:compiled_iseq_entry])
out.puts "cold_iseq_entry: " + format_number_pct(13, stats[:cold_iseq_entry], stats[:compiled_iseq_entry])
out.puts "compiled_iseq_count: " + format_number(13, stats[:compiled_iseq_count])
out.puts "compiled_blockid_count:" + format_number(13, stats[:compiled_blockid_count])
out.puts "compiled_block_count: " + format_number(13, stats[:compiled_block_count])

View File

@ -986,6 +986,9 @@ pub struct IseqPayload {
// Blocks that are invalidated but are not yet deallocated.
// The code GC will free them later.
pub dead_blocks: Vec<BlockRef>,
// Used to estimate how frequently this ISEQ gets called
pub call_count_at_interv: u64,
}
impl IseqPayload {

View File

@ -13,6 +13,10 @@ pub struct Options {
// Threshold==1 means compile on first execution
pub call_threshold: usize,
// Number of execution requests after which a method is no longer
// considered hot. Raising this results in more generated code.
pub cold_threshold: usize,
// Generate versions greedily until the limit is hit
pub greedy_versioning: bool,
@ -59,6 +63,7 @@ pub struct Options {
pub static mut OPTIONS: Options = Options {
exec_mem_size: 128 * 1024 * 1024,
call_threshold: 30,
cold_threshold: 200_000,
greedy_versioning: false,
no_type_prop: false,
max_versions: 4,
@ -143,6 +148,13 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
}
},
("cold-threshold", _) => match opt_val.parse() {
Ok(n) => unsafe { OPTIONS.cold_threshold = n },
Err(_) => {
return None;
}
},
("max-versions", _) => match opt_val.parse() {
Ok(n) => unsafe { OPTIONS.max_versions = n },
Err(_) => {

View File

@ -198,9 +198,10 @@ macro_rules! make_counters {
/// The list of counters that are available without --yjit-stats.
/// They are incremented only by `incr_counter!` and don't use `gen_counter_incr`.
pub const DEFAULT_COUNTERS: [Counter; 7] = [
pub const DEFAULT_COUNTERS: [Counter; 8] = [
Counter::code_gc_count,
Counter::compiled_iseq_entry,
Counter::cold_iseq_entry,
Counter::compiled_iseq_count,
Counter::compiled_blockid_count,
Counter::compiled_block_count,
@ -441,6 +442,7 @@ make_counters! {
binding_set,
compiled_iseq_entry,
cold_iseq_entry,
compiled_iseq_count,
compiled_blockid_count,
compiled_block_count,

View File

@ -46,11 +46,55 @@ pub fn yjit_enabled_p() -> bool {
YJIT_ENABLED.load(Ordering::Acquire)
}
/// Make the call threshold available to C
#[no_mangle]
pub extern "C" fn rb_yjit_call_threshold() -> raw::c_ulong {
get_option!(call_threshold) as raw::c_ulong
}
// Counter to serve as a proxy for execution time, total number of calls
static mut TOTAL_ENTRY_HITS: u64 = 0;
// Number of calls used to estimate how hot an ISEQ is
static CALL_COUNT_INTERV: u64 = 20;
/// Test whether we are ready to compile an ISEQ or not
#[no_mangle]
pub extern "C" fn rb_yjit_threshold_hit(_iseq: IseqPtr, total_calls: u64) -> bool {
pub extern "C" fn rb_yjit_threshold_hit(iseq: IseqPtr, total_calls: u64) -> bool {
let call_threshold = get_option!(call_threshold) as u64;
return total_calls == call_threshold;
unsafe { TOTAL_ENTRY_HITS += 1; }
// Record the number of calls at the beginning of the interval
if total_calls + CALL_COUNT_INTERV == call_threshold {
let payload = get_or_create_iseq_payload(iseq);
let call_count = unsafe { TOTAL_ENTRY_HITS };
payload.call_count_at_interv = call_count;
}
// Try to estimate the total time taken (total number of calls) to reach 20 calls to this ISEQ
// This give us a ratio of how hot/cold this ISEQ is
if total_calls == call_threshold {
// We expect threshold 1 to compile everything immediately
if call_threshold < CALL_COUNT_INTERV {
return true;
}
let payload = get_or_create_iseq_payload(iseq);
let call_count = unsafe { TOTAL_ENTRY_HITS };
let num_calls = call_count - payload.call_count_at_interv;
// Reject ISEQs that don't get called often enough
if num_calls > get_option!(cold_threshold) as u64 {
incr_counter!(cold_iseq_entry);
return false;
}
return true;
}
return false;
}
/// This function is called from C code