YJIT: port call threshold logic from Rust to C for performance (#8628)

* Port call threshold logic from Rust to C for performance

* Prefix global/field names with yjit_

* Fix linker error

* Fix preprocessor condition for rb_yjit_threshold_hit

* Fix third linker issue

* Exclude yjit_calls_at_interv from RJIT bindgen

---------

Co-authored-by: Takashi Kokubun <takashikkbn@gmail.com>
This commit is contained in:
Maxime Chevalier-Boisvert 2023-10-12 10:05:34 -04:00 committed by GitHub
parent 0c42c28531
commit b2e1ddffa5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 83 additions and 78 deletions

View File

@ -637,7 +637,7 @@ generator = BindingGenerator.new(
skip_fields: {
'rb_execution_context_struct.machine': %w[regs], # differs between macOS and Linux
rb_execution_context_struct: %w[method_missing_reason], # non-leading bit fields not supported
rb_iseq_constant_body: %w[jit_exception jit_exception_calls yjit_payload], # conditionally defined
rb_iseq_constant_body: %w[jit_exception jit_exception_calls yjit_payload yjit_calls_at_interv], # conditionally defined
rb_thread_struct: %w[status has_dedicated_nt to_kill abort_on_exception report_on_exception pending_interrupt_queue_checked],
:'' => %w[is_from_method is_lambda is_isolated], # rb_proc_t
},

51
vm.c
View File

@ -369,6 +369,49 @@ extern VALUE rb_vm_invoke_bmethod(rb_execution_context_t *ec, rb_proc_t *proc, V
const rb_callable_method_entry_t *me);
static VALUE vm_invoke_proc(rb_execution_context_t *ec, rb_proc_t *proc, VALUE self, int argc, const VALUE *argv, int kw_splat, VALUE block_handler);
#if USE_YJIT
// Counter to serve as a proxy for execution time, total number of calls
static uint64_t yjit_total_entry_hits = 0;
// Number of calls used to estimate how hot an ISEQ is
#define YJIT_CALL_COUNT_INTERV 20u
/// Test whether we are ready to compile an ISEQ or not
static inline bool
rb_yjit_threshold_hit(const rb_iseq_t *iseq, uint64_t entry_calls)
{
yjit_total_entry_hits += 1;
// Record the number of calls at the beginning of the interval
if (entry_calls + YJIT_CALL_COUNT_INTERV == rb_yjit_call_threshold) {
iseq->body->yjit_calls_at_interv = yjit_total_entry_hits;
}
// Try to estimate the total time taken (total number of calls) to reach 20 calls to this ISEQ
// This give us a ratio of how hot/cold this ISEQ is
if (entry_calls == rb_yjit_call_threshold) {
// We expect threshold 1 to compile everything immediately
if (rb_yjit_call_threshold < YJIT_CALL_COUNT_INTERV) {
return true;
}
uint64_t num_calls = yjit_total_entry_hits - iseq->body->yjit_calls_at_interv;
// Reject ISEQs that don't get called often enough
if (num_calls > rb_yjit_cold_threshold) {
rb_yjit_incr_counter("cold_iseq_entry");
return false;
}
return true;
}
return false;
}
#else
#define rb_yjit_threshold_hit(iseq, entry_calls) false
#endif
#if USE_RJIT || USE_YJIT
// Generate JIT code that supports the following kinds of ISEQ entries:
// * The first ISEQ on vm_exec (e.g. <main>, or Ruby methods/blocks
@ -396,10 +439,8 @@ jit_compile(rb_execution_context_t *ec)
rb_yjit_compile_iseq(iseq, ec, false);
}
}
else { // rb_rjit_call_p
if (body->jit_entry_calls == rb_rjit_call_threshold()) {
rb_rjit_compile(iseq);
}
else if (body->jit_entry_calls == rb_rjit_call_threshold()) {
rb_rjit_compile(iseq);
}
}
return body->jit_entry;
@ -442,7 +483,7 @@ jit_compile_exception(rb_execution_context_t *ec)
// Increment the ISEQ's call counter and trigger JIT compilation if not compiled
if (body->jit_exception == NULL) {
body->jit_exception_calls++;
if (body->jit_exception_calls == rb_yjit_call_threshold()) {
if (body->jit_exception_calls == rb_yjit_call_threshold) {
rb_yjit_compile_iseq(iseq, ec, true);
}
}

View File

@ -524,6 +524,8 @@ struct rb_iseq_constant_body {
#if USE_YJIT
// YJIT stores some data on each iseq.
void *yjit_payload;
// Used to estimate how frequently this ISEQ gets called
uint64_t yjit_calls_at_interv;
#endif
};

8
yjit.h
View File

@ -25,10 +25,11 @@
#endif
// Expose these as declarations since we are building YJIT.
extern uint64_t rb_yjit_call_threshold;
extern uint64_t rb_yjit_cold_threshold;
void rb_yjit_incr_counter(const char *counter_name);
bool rb_yjit_enabled_p(void);
bool rb_yjit_compile_new_iseqs(void);
unsigned long rb_yjit_call_threshold(void);
bool rb_yjit_threshold_hit(const rb_iseq_t *const iseq, unsigned long total_calls);
void rb_yjit_invalidate_all_method_lookup_assumptions(void);
void rb_yjit_cme_invalidate(rb_callable_method_entry_t *cme);
void rb_yjit_collect_binding_alloc(void);
@ -49,10 +50,9 @@ void rb_yjit_show_usage(int help, int highlight, unsigned int width, int columns
// !USE_YJIT
// In these builds, YJIT could never be turned on. Provide dummy implementations.
static inline void rb_yjit_incr_counter(const char *counter_name) {}
static inline bool rb_yjit_enabled_p(void) { return false; }
static inline bool rb_yjit_compile_new_iseqs(void) { return false; }
static inline unsigned long rb_yjit_call_threshold(void) { return 0; }
static inline bool rb_yjit_threshold_hit(const rb_iseq_t *const iseq, unsigned long total_calls) { return false; }
static inline void rb_yjit_invalidate_all_method_lookup_assumptions(void) {}
static inline void rb_yjit_cme_invalidate(rb_callable_method_entry_t *cme) {}
static inline void rb_yjit_collect_binding_alloc(void) {}

View File

@ -317,7 +317,7 @@ module RubyVM::YJIT
out.puts "bindings_set: " + format_number(13, stats[:binding_set])
out.puts "compilation_failure: " + format_number(13, compilation_failure) if compilation_failure != 0
out.puts "compiled_iseq_entry: " + format_number(13, stats[:compiled_iseq_entry])
out.puts "cold_iseq_entry: " + format_number_pct(13, stats[:cold_iseq_entry], stats[:compiled_iseq_entry])
out.puts "cold_iseq_entry: " + format_number_pct(13, stats[:cold_iseq_entry], stats[:compiled_iseq_entry] + stats[:cold_iseq_entry])
out.puts "compiled_iseq_count: " + format_number(13, stats[:compiled_iseq_count])
out.puts "compiled_blockid_count:" + format_number(13, stats[:compiled_blockid_count])
out.puts "compiled_block_count: " + format_number(13, stats[:compiled_block_count])

View File

@ -976,7 +976,6 @@ impl fmt::Debug for MutableBranchList {
}
}
/// This is all the data YJIT stores on an iseq
/// This will be dynamically allocated by C code
/// C code should pass an &mut IseqPayload to us
@ -995,9 +994,6 @@ pub struct IseqPayload {
// Blocks that are invalidated but are not yet deallocated.
// The code GC will free them later.
pub dead_blocks: Vec<BlockRef>,
// Used to estimate how frequently this ISEQ gets called
pub call_count_at_interv: u64,
}
impl IseqPayload {

View File

@ -2,6 +2,18 @@ use std::{ffi::{CStr, CString}, ptr::null};
use crate::backend::current::TEMP_REGS;
use std::os::raw::{c_char, c_int, c_uint};
// This option is exposed to the C side a a global variable for performance, see vm.c
// Number of method calls after which to start generating code
// Threshold==1 means compile on first execution
#[no_mangle]
static mut rb_yjit_call_threshold: u64 = 30;
// This option is exposed to the C side a a global variable for performance, see vm.c
// Number of execution requests after which a method is no longer
// considered hot. Raising this results in more generated code.
#[no_mangle]
static mut rb_yjit_cold_threshold: u64 = 200_000;
// Command-line options
#[derive(Clone, PartialEq, Eq, Debug)]
#[repr(C)]
@ -10,14 +22,6 @@ pub struct Options {
// Note that the command line argument is expressed in MiB and not bytes
pub exec_mem_size: usize,
// Number of method calls after which to start generating code
// Threshold==1 means compile on first execution
pub call_threshold: usize,
// Number of execution requests after which a method is no longer
// considered hot. Raising this results in more generated code.
pub cold_threshold: usize,
// Generate versions greedily until the limit is hit
pub greedy_versioning: bool,
@ -63,8 +67,6 @@ pub struct Options {
// Initialize the options to default values
pub static mut OPTIONS: Options = Options {
exec_mem_size: 128 * 1024 * 1024,
call_threshold: 30,
cold_threshold: 200_000,
greedy_versioning: false,
no_type_prop: false,
max_versions: 4,
@ -155,14 +157,14 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
},
("call-threshold", _) => match opt_val.parse() {
Ok(n) => unsafe { OPTIONS.call_threshold = n },
Ok(n) => unsafe { rb_yjit_call_threshold = n },
Err(_) => {
return None;
}
},
("cold-threshold", _) => match opt_val.parse() {
Ok(n) => unsafe { OPTIONS.cold_threshold = n },
Ok(n) => unsafe { rb_yjit_cold_threshold = n },
Err(_) => {
return None;
}

View File

@ -567,6 +567,21 @@ pub extern "C" fn rb_yjit_get_exit_locations(_ec: EcPtr, _ruby_self: VALUE) -> V
}
}
/// Increment a counter by name from the CRuby side
/// Warning: this is not fast because it requires a hash lookup, so don't use in tight loops
#[no_mangle]
pub extern "C" fn rb_yjit_incr_counter(counter_name: *const std::os::raw::c_char) {
use std::ffi::CStr;
if !get_option!(gen_stats) {
return;
}
let counter_name = unsafe { CStr::from_ptr(counter_name).to_str().unwrap() };
let counter_ptr = get_counter_ptr(counter_name);
unsafe { *counter_ptr += 1 };
}
/// Export all YJIT statistics as a Ruby hash.
fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
// If YJIT is not enabled, return Qnil

View File

@ -46,57 +46,6 @@ pub fn yjit_enabled_p() -> bool {
YJIT_ENABLED.load(Ordering::Acquire)
}
/// Make the call threshold available to C
#[no_mangle]
pub extern "C" fn rb_yjit_call_threshold() -> raw::c_ulong {
get_option!(call_threshold) as raw::c_ulong
}
// Counter to serve as a proxy for execution time, total number of calls
static mut TOTAL_ENTRY_HITS: u64 = 0;
// Number of calls used to estimate how hot an ISEQ is
static CALL_COUNT_INTERV: u64 = 20;
/// Test whether we are ready to compile an ISEQ or not
#[no_mangle]
pub extern "C" fn rb_yjit_threshold_hit(iseq: IseqPtr, total_calls: u64) -> bool {
let call_threshold = get_option!(call_threshold) as u64;
unsafe { TOTAL_ENTRY_HITS += 1; }
// Record the number of calls at the beginning of the interval
if total_calls + CALL_COUNT_INTERV == call_threshold {
let payload = get_or_create_iseq_payload(iseq);
let call_count = unsafe { TOTAL_ENTRY_HITS };
payload.call_count_at_interv = call_count;
}
// Try to estimate the total time taken (total number of calls) to reach 20 calls to this ISEQ
// This give us a ratio of how hot/cold this ISEQ is
if total_calls == call_threshold {
// We expect threshold 1 to compile everything immediately
if call_threshold < CALL_COUNT_INTERV {
return true;
}
let payload = get_or_create_iseq_payload(iseq);
let call_count = unsafe { TOTAL_ENTRY_HITS };
let num_calls = call_count - payload.call_count_at_interv;
// Reject ISEQs that don't get called often enough
if num_calls > get_option!(cold_threshold) as u64 {
incr_counter!(cold_iseq_entry);
return false;
}
return true;
}
return false;
}
/// This function is called from C code
#[no_mangle]
pub extern "C" fn rb_yjit_init_rust() {