YJIT: Add --yjit-mem-size option (#11810)

* YJIT: Add --yjit-mem-size option

* Improve --help

* s/the region/this virtual memory region/

Co-authored-by: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@shopify.com>

---------

Co-authored-by: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@shopify.com>
This commit is contained in:
Takashi Kokubun 2024-10-07 10:07:23 -07:00 committed by GitHub
parent 8d1906e8b7
commit 35711903f2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
Notes: git 2024-10-07 17:07:40 +00:00
Merged-By: maximecb <maximecb@ruby-lang.org>
6 changed files with 62 additions and 21 deletions

View File

@ -166,7 +166,8 @@ The machine code generated for a given method can be printed by adding `puts Rub
YJIT supports all command-line options supported by upstream CRuby, but also adds a few YJIT-specific options: YJIT supports all command-line options supported by upstream CRuby, but also adds a few YJIT-specific options:
- `--yjit`: enable YJIT (disabled by default) - `--yjit`: enable YJIT (disabled by default)
- `--yjit-exec-mem-size=N`: size of the executable memory block to allocate, in MiB (default 48 MiB) - `--yjit-mem-size=N`: soft limit on YJIT memory usage in MiB (default: 128). Tries to limit `code_region_size + yjit_alloc_size`
- `--yjit-exec-mem-size=N`: hard limit on executable memory block in MiB. Limits `code_region_size`
- `--yjit-call-threshold=N`: number of calls after which YJIT begins to compile a function. - `--yjit-call-threshold=N`: number of calls after which YJIT begins to compile a function.
It defaults to 30, and it's then increased to 120 when the number of ISEQs in the process reaches 40,000. It defaults to 30, and it's then increased to 120 when the number of ISEQs in the process reaches 40,000.
- `--yjit-cold-threshold=N`: number of global calls after which an ISEQ is considered cold and not - `--yjit-cold-threshold=N`: number of global calls after which an ISEQ is considered cold and not

View File

@ -686,7 +686,7 @@ impl CodeBlock {
let alloc = TestingAllocator::new(mem_size); let alloc = TestingAllocator::new(mem_size);
let mem_start: *const u8 = alloc.mem_start(); let mem_start: *const u8 = alloc.mem_start();
let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size); let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size, 128 * 1024 * 1024);
Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(None), true) Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(None), true)
} }
@ -704,7 +704,7 @@ impl CodeBlock {
let alloc = TestingAllocator::new(mem_size); let alloc = TestingAllocator::new(mem_size);
let mem_start: *const u8 = alloc.mem_start(); let mem_start: *const u8 = alloc.mem_start();
let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size); let virt_mem = VirtualMem::new(alloc, 1, NonNull::new(mem_start as *mut u8).unwrap(), mem_size, 128 * 1024 * 1024);
Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(Some(freed_pages)), true) Self::new(Rc::new(RefCell::new(virt_mem)), false, Rc::new(Some(freed_pages)), true)
} }

View File

@ -10504,11 +10504,11 @@ impl CodegenGlobals {
/// Initialize the codegen globals /// Initialize the codegen globals
pub fn init() { pub fn init() {
// Executable memory and code page size in bytes // Executable memory and code page size in bytes
let mem_size = get_option!(exec_mem_size); let exec_mem_size = get_option!(exec_mem_size).unwrap_or(get_option!(mem_size));
#[cfg(not(test))] #[cfg(not(test))]
let (mut cb, mut ocb) = { let (mut cb, mut ocb) = {
let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size as u32) }; let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(exec_mem_size as u32) };
// Memory protection syscalls need page-aligned addresses, so check it here. Assuming // Memory protection syscalls need page-aligned addresses, so check it here. Assuming
// `virt_block` is page-aligned, `second_half` should be page-aligned as long as the // `virt_block` is page-aligned, `second_half` should be page-aligned as long as the
@ -10530,7 +10530,8 @@ impl CodegenGlobals {
SystemAllocator {}, SystemAllocator {},
page_size, page_size,
NonNull::new(virt_block).unwrap(), NonNull::new(virt_block).unwrap(),
mem_size, exec_mem_size,
get_option!(mem_size),
); );
let mem_block = Rc::new(RefCell::new(mem_block)); let mem_block = Rc::new(RefCell::new(mem_block));
@ -10546,9 +10547,9 @@ impl CodegenGlobals {
// In test mode we're not linking with the C code // In test mode we're not linking with the C code
// so we don't allocate executable memory // so we don't allocate executable memory
#[cfg(test)] #[cfg(test)]
let mut cb = CodeBlock::new_dummy(mem_size / 2); let mut cb = CodeBlock::new_dummy(exec_mem_size / 2);
#[cfg(test)] #[cfg(test)]
let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(mem_size / 2)); let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(exec_mem_size / 2));
let ocb_start_addr = ocb.unwrap().get_write_ptr(); let ocb_start_addr = ocb.unwrap().get_write_ptr();
let leave_exit_code = gen_leave_exit(&mut ocb).unwrap(); let leave_exit_code = gen_leave_exit(&mut ocb).unwrap();

View File

@ -27,9 +27,14 @@ pub static mut rb_yjit_cold_threshold: u64 = 200_000;
#[derive(Debug)] #[derive(Debug)]
#[repr(C)] #[repr(C)]
pub struct Options { pub struct Options {
// Size of the executable memory block to allocate in bytes /// Soft limit of all memory used by YJIT in bytes
// Note that the command line argument is expressed in MiB and not bytes /// VirtualMem avoids allocating new pages if code_region_size + yjit_alloc_size
pub exec_mem_size: usize, /// is larger than this threshold. Rust may still allocate memory beyond this limit.
pub mem_size: usize,
/// Hard limit of the executable memory block to allocate in bytes
/// Note that the command line argument is expressed in MiB and not bytes
pub exec_mem_size: Option<usize>,
// Disable the propagation of type information // Disable the propagation of type information
pub no_type_prop: bool, pub no_type_prop: bool,
@ -81,7 +86,8 @@ pub struct Options {
// Initialize the options to default values // Initialize the options to default values
pub static mut OPTIONS: Options = Options { pub static mut OPTIONS: Options = Options {
exec_mem_size: 48 * 1024 * 1024, mem_size: 128 * 1024 * 1024,
exec_mem_size: None,
no_type_prop: false, no_type_prop: false,
max_versions: 4, max_versions: 4,
num_temp_regs: 5, num_temp_regs: 5,
@ -100,8 +106,10 @@ pub static mut OPTIONS: Options = Options {
}; };
/// YJIT option descriptions for `ruby --help`. /// YJIT option descriptions for `ruby --help`.
static YJIT_OPTIONS: [(&str, &str); 9] = [ /// Note that --help allows only 80 characters per line, including indentation. 80-character limit --> |
("--yjit-exec-mem-size=num", "Size of executable memory block in MiB (default: 48)."), pub const YJIT_OPTIONS: &'static [(&str, &str)] = &[
("--yjit-mem-size=num", "Soft limit on YJIT memory usage in MiB (default: 128)."),
("--yjit-exec-mem-size=num", "Hard limit on executable memory block in MiB."),
("--yjit-call-threshold=num", "Number of calls to trigger JIT."), ("--yjit-call-threshold=num", "Number of calls to trigger JIT."),
("--yjit-cold-threshold=num", "Global calls after which ISEQs not compiled (default: 200K)."), ("--yjit-cold-threshold=num", "Global calls after which ISEQs not compiled (default: 200K)."),
("--yjit-stats", "Enable collecting YJIT statistics."), ("--yjit-stats", "Enable collecting YJIT statistics."),
@ -183,6 +191,20 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
match (opt_name, opt_val) { match (opt_name, opt_val) {
("", "") => (), // Simply --yjit ("", "") => (), // Simply --yjit
("mem-size", _) => match opt_val.parse::<usize>() {
Ok(n) => {
if n == 0 || n > 2 * 1024 * 1024 {
return None
}
// Convert from MiB to bytes internally for convenience
unsafe { OPTIONS.mem_size = n * 1024 * 1024 }
}
Err(_) => {
return None;
}
},
("exec-mem-size", _) => match opt_val.parse::<usize>() { ("exec-mem-size", _) => match opt_val.parse::<usize>() {
Ok(n) => { Ok(n) => {
if n == 0 || n > 2 * 1024 * 1024 { if n == 0 || n > 2 * 1024 * 1024 {
@ -190,7 +212,7 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
} }
// Convert from MiB to bytes internally for convenience // Convert from MiB to bytes internally for convenience
unsafe { OPTIONS.exec_mem_size = n * 1024 * 1024 } unsafe { OPTIONS.exec_mem_size = Some(n * 1024 * 1024) }
} }
Err(_) => { Err(_) => {
return None; return None;

View File

@ -56,6 +56,11 @@ unsafe impl GlobalAlloc for StatsAlloc {
} }
} }
/// The number of bytes YJIT has allocated on the Rust heap.
pub fn yjit_alloc_size() -> usize {
GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst)
}
/// Mapping of C function / ISEQ name to integer indices /// Mapping of C function / ISEQ name to integer indices
/// This is accessed at compilation time only (protected by a lock) /// This is accessed at compilation time only (protected by a lock)
static mut CFUNC_NAME_TO_IDX: Option<HashMap<String, usize>> = None; static mut CFUNC_NAME_TO_IDX: Option<HashMap<String, usize>> = None;
@ -770,7 +775,7 @@ fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE {
set_stat_usize!(hash, "code_region_size", cb.mapped_region_size()); set_stat_usize!(hash, "code_region_size", cb.mapped_region_size());
// Rust global allocations in bytes // Rust global allocations in bytes
set_stat_usize!(hash, "yjit_alloc_size", GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst)); set_stat_usize!(hash, "yjit_alloc_size", yjit_alloc_size());
// How many bytes we are using to store context data // How many bytes we are using to store context data
let context_data = CodegenGlobals::get_context_data(); let context_data = CodegenGlobals::get_context_data();

View File

@ -5,7 +5,7 @@
use std::ptr::NonNull; use std::ptr::NonNull;
use crate::{utils::IntoUsize, backend::ir::Target}; use crate::{backend::ir::Target, stats::yjit_alloc_size, utils::IntoUsize};
#[cfg(not(test))] #[cfg(not(test))]
pub type VirtualMem = VirtualMemory<sys::SystemAllocator>; pub type VirtualMem = VirtualMemory<sys::SystemAllocator>;
@ -26,9 +26,12 @@ pub struct VirtualMemory<A: Allocator> {
/// Location of the virtual memory region. /// Location of the virtual memory region.
region_start: NonNull<u8>, region_start: NonNull<u8>,
/// Size of the region in bytes. /// Size of this virtual memory region in bytes.
region_size_bytes: usize, region_size_bytes: usize,
/// mapped_region_bytes + yjit_alloc_size may not increase beyond this limit.
memory_limit_bytes: usize,
/// Number of bytes per "page", memory protection permission can only be controlled at this /// Number of bytes per "page", memory protection permission can only be controlled at this
/// granularity. /// granularity.
page_size_bytes: usize, page_size_bytes: usize,
@ -106,13 +109,20 @@ use WriteError::*;
impl<A: Allocator> VirtualMemory<A> { impl<A: Allocator> VirtualMemory<A> {
/// Bring a part of the address space under management. /// Bring a part of the address space under management.
pub fn new(allocator: A, page_size: u32, virt_region_start: NonNull<u8>, size_bytes: usize) -> Self { pub fn new(
allocator: A,
page_size: u32,
virt_region_start: NonNull<u8>,
region_size_bytes: usize,
memory_limit_bytes: usize,
) -> Self {
assert_ne!(0, page_size); assert_ne!(0, page_size);
let page_size_bytes = page_size.as_usize(); let page_size_bytes = page_size.as_usize();
Self { Self {
region_start: virt_region_start, region_start: virt_region_start,
region_size_bytes: size_bytes, region_size_bytes,
memory_limit_bytes,
page_size_bytes, page_size_bytes,
mapped_region_bytes: 0, mapped_region_bytes: 0,
current_write_page: None, current_write_page: None,
@ -176,7 +186,8 @@ impl<A: Allocator> VirtualMemory<A> {
} }
self.current_write_page = Some(page_addr); self.current_write_page = Some(page_addr);
} else if (start..whole_region_end).contains(&raw) { } else if (start..whole_region_end).contains(&raw) &&
(page_addr + page_size - start as usize) + yjit_alloc_size() < self.memory_limit_bytes {
// Writing to a brand new page // Writing to a brand new page
let mapped_region_end_addr = mapped_region_end as usize; let mapped_region_end_addr = mapped_region_end as usize;
let alloc_size = page_addr - mapped_region_end_addr + page_size; let alloc_size = page_addr - mapped_region_end_addr + page_size;
@ -368,6 +379,7 @@ pub mod tests {
PAGE_SIZE.try_into().unwrap(), PAGE_SIZE.try_into().unwrap(),
NonNull::new(mem_start as *mut u8).unwrap(), NonNull::new(mem_start as *mut u8).unwrap(),
mem_size, mem_size,
128 * 1024 * 1024,
) )
} }