YJIT: Interleave inline and outlined code blocks (#6460)

Co-authored-by: Alan Wu <alansi.xingwu@shopify.com>
Co-authored-by: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@shopify.com>
This commit is contained in:
Takashi Kokubun 2022-10-17 10:45:59 -07:00 committed by GitHub
parent e7c71c6c92
commit 64c52c4282
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
Notes: git 2022-10-17 17:46:19 +00:00
Merged-By: k0kubun <takashikkbn@gmail.com>
9 changed files with 379 additions and 162 deletions

View File

@ -1,9 +1,20 @@
use std::cell::RefCell;
use std::cmp;
use std::fmt;
use std::mem;
use std::rc::Rc;
#[cfg(target_arch = "x86_64")]
use crate::backend::x86_64::JMP_PTR_BYTES;
#[cfg(target_arch = "aarch64")]
use crate::backend::arm64::JMP_PTR_BYTES;
use crate::backend::ir::Assembler;
use crate::backend::ir::Target;
use crate::virtualmem::WriteError;
#[cfg(feature = "asm_comments")]
use std::collections::BTreeMap;
use crate::codegen::CodegenGlobals;
use crate::virtualmem::{VirtualMem, CodePtr};
// Lots of manual vertical alignment in there that rustfmt doesn't handle well.
@ -17,7 +28,8 @@ pub mod arm64;
//
/// Reference to an ASM label
struct LabelRef {
#[derive(Clone)]
pub struct LabelRef {
// Position in the code block where the label reference exists
pos: usize,
@ -36,7 +48,7 @@ struct LabelRef {
/// Block of memory into which instructions can be assembled
pub struct CodeBlock {
// Memory for storing the encoded instructions
mem_block: VirtualMem,
mem_block: Rc<RefCell<VirtualMem>>,
// Memory block size
mem_size: usize,
@ -44,6 +56,12 @@ pub struct CodeBlock {
// Current writing position
write_pos: usize,
// Size of a code page (inlined + outlined)
page_size: usize,
// Size reserved for writing a jump to the next page
page_end_reserve: usize,
// Table of registered label addresses
label_addrs: Vec<usize>,
@ -58,7 +76,6 @@ pub struct CodeBlock {
asm_comments: BTreeMap<usize, Vec<String>>,
// True for OutlinedCb
#[cfg(feature = "disasm")]
pub outlined: bool,
// Set if the CodeBlock is unable to output some instructions,
@ -67,27 +84,158 @@ pub struct CodeBlock {
dropped_bytes: bool,
}
/// Set of CodeBlock label states. Used for recovering the previous state.
pub struct LabelState {
label_addrs: Vec<usize>,
label_names: Vec<String>,
label_refs: Vec<LabelRef>,
}
impl CodeBlock {
/// Make a new CodeBlock
pub fn new(mem_block: VirtualMem, outlined: bool) -> Self {
Self {
mem_size: mem_block.virtual_region_size(),
pub fn new(mem_block: Rc<RefCell<VirtualMem>>, page_size: usize, outlined: bool) -> Self {
let mem_size = mem_block.borrow().virtual_region_size();
let mut cb = Self {
mem_block,
mem_size,
write_pos: 0,
page_size,
page_end_reserve: JMP_PTR_BYTES,
label_addrs: Vec::new(),
label_names: Vec::new(),
label_refs: Vec::new(),
#[cfg(feature = "asm_comments")]
asm_comments: BTreeMap::new(),
#[cfg(feature = "disasm")]
outlined,
dropped_bytes: false,
};
cb.write_pos = cb.page_start();
cb
}
/// Move the CodeBlock to the next page. If it's on the furthest page,
/// move the other CodeBlock to the next page as well.
pub fn next_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, base_ptr: CodePtr, jmp_ptr: F) -> bool {
let old_write_ptr = self.get_write_ptr();
self.set_write_ptr(base_ptr);
self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES)));
// Move self to the next page
let next_page_idx = self.write_pos / self.page_size + 1;
if !self.set_page(next_page_idx, &jmp_ptr) {
self.set_write_ptr(old_write_ptr); // rollback if there are no more pages
return false;
}
// Move the other CodeBlock to the same page if it'S on the furthest page
self.other_cb().unwrap().set_page(next_page_idx, &jmp_ptr);
return !self.dropped_bytes;
}
/// Move the CodeBlock to page_idx only if it's not going backwards.
fn set_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, page_idx: usize, jmp_ptr: &F) -> bool {
// Do not move the CodeBlock if page_idx points to an old position so that this
// CodeBlock will not overwrite existing code.
//
// Let's say this is the current situation:
// cb: [page1, page2, page3 (write_pos)], ocb: [page1, page2, page3 (write_pos)]
//
// When cb needs to patch page1, this will be temporarily changed to:
// cb: [page1 (write_pos), page2, page3], ocb: [page1, page2, page3 (write_pos)]
//
// While patching page1, cb may need to jump to page2. What set_page currently does is:
// cb: [page1, page2 (write_pos), page3], ocb: [page1, page2, page3 (write_pos)]
// instead of:
// cb: [page1, page2 (write_pos), page3], ocb: [page1, page2 (write_pos), page3]
// because moving ocb's write_pos from page3 to the beginning of page2 will let ocb's
// write_pos point to existing code in page2, which might let ocb overwrite it later.
//
// We could remember the last write_pos in page2 and let set_page use that position,
// but you need to waste some space for keeping write_pos for every single page.
// It doesn't seem necessary for performance either. So we're currently not doing it.
let mut dst_pos = self.page_size * page_idx + self.page_start();
if self.page_size * page_idx < self.mem_size && self.write_pos < dst_pos {
// Reset dropped_bytes
self.dropped_bytes = false;
// Convert dst_pos to dst_ptr
let src_pos = self.write_pos;
self.write_pos = dst_pos;
let dst_ptr = self.get_write_ptr();
self.write_pos = src_pos;
// Generate jmp_ptr from src_pos to dst_pos
self.without_page_end_reserve(|cb| {
cb.add_comment("jump to next page");
jmp_ptr(cb, dst_ptr);
assert!(!cb.has_dropped_bytes());
});
// Start the next code from dst_pos
self.write_pos = dst_pos;
}
!self.dropped_bytes
}
/// write_pos of the current page start
pub fn page_start_pos(&self) -> usize {
self.get_write_pos() / self.page_size * self.page_size + self.page_start()
}
/// Offset of each page where CodeBlock should start writing
pub fn page_start(&self) -> usize {
let mut start = if self.inline() {
0
} else {
self.page_size / 2
};
if cfg!(debug_assertions) && !cfg!(test) {
// Leave illegal instructions at the beginning of each page to assert
// we're not accidentally crossing page boundaries.
start += JMP_PTR_BYTES;
}
start
}
/// Offset of each page where CodeBlock should stop writing (exclusive)
pub fn page_end(&self) -> usize {
let page_end = if self.inline() {
self.page_size / 2
} else {
self.page_size
};
page_end - self.page_end_reserve // reserve space to jump to the next page
}
/// Call a given function with page_end_reserve = 0
pub fn without_page_end_reserve<F: Fn(&mut Self)>(&mut self, block: F) {
let old_page_end_reserve = self.page_end_reserve;
self.page_end_reserve = 0;
block(self);
self.page_end_reserve = old_page_end_reserve;
}
/// Return the address ranges of a given address range that this CodeBlock can write.
pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
let mut addrs = vec![];
let mut start = start_ptr.raw_ptr() as usize;
let codeblock_end = self.get_ptr(self.get_mem_size()).raw_ptr() as usize;
let end = std::cmp::min(end_ptr.raw_ptr() as usize, codeblock_end);
while start < end {
let current_page = start / self.page_size * self.page_size;
let page_end = std::cmp::min(end, current_page + self.page_end()) as usize;
addrs.push((start, page_end));
start = current_page + self.page_size + self.page_start();
}
addrs
}
/// Check if this code block has sufficient remaining capacity
pub fn has_capacity(&self, num_bytes: usize) -> bool {
self.write_pos + num_bytes < self.mem_size
let page_offset = self.write_pos % self.page_size;
let capacity = self.page_end().saturating_sub(page_offset);
num_bytes <= capacity
}
/// Add an assembly comment if the feature is on.
@ -121,8 +269,8 @@ impl CodeBlock {
self.write_pos
}
pub fn get_mem(&mut self) -> &mut VirtualMem {
&mut self.mem_block
pub fn write_mem(&self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> {
self.mem_block.borrow_mut().write_byte(write_ptr, byte)
}
// Set the current write position
@ -134,49 +282,31 @@ impl CodeBlock {
self.write_pos = pos;
}
// Align the current write pointer to a multiple of bytes
pub fn align_pos(&mut self, multiple: u32) {
// Compute the alignment boundary that is lower or equal
// Do everything with usize
let multiple: usize = multiple.try_into().unwrap();
let pos = self.get_write_ptr().raw_ptr() as usize;
let remainder = pos % multiple;
let prev_aligned = pos - remainder;
if prev_aligned == pos {
// Already aligned so do nothing
} else {
// Align by advancing
let pad = multiple - remainder;
self.set_pos(self.get_write_pos() + pad);
}
}
// Set the current write position from a pointer
pub fn set_write_ptr(&mut self, code_ptr: CodePtr) {
let pos = code_ptr.into_usize() - self.mem_block.start_ptr().into_usize();
let pos = code_ptr.into_usize() - self.mem_block.borrow().start_ptr().into_usize();
self.set_pos(pos);
}
/// Get a (possibly dangling) direct pointer into the executable memory block
pub fn get_ptr(&self, offset: usize) -> CodePtr {
self.mem_block.start_ptr().add_bytes(offset)
self.mem_block.borrow().start_ptr().add_bytes(offset)
}
/// Get a (possibly dangling) direct pointer to the current write position
pub fn get_write_ptr(&mut self) -> CodePtr {
pub fn get_write_ptr(&self) -> CodePtr {
self.get_ptr(self.write_pos)
}
/// Write a single byte at the current position.
pub fn write_byte(&mut self, byte: u8) {
let write_ptr = self.get_write_ptr();
if self.mem_block.write_byte(write_ptr, byte).is_ok() {
self.write_pos += 1;
} else {
if !self.has_capacity(1) || self.mem_block.borrow_mut().write_byte(write_ptr, byte).is_err() {
self.dropped_bytes = true;
}
// Always advance write_pos since arm64 PadEntryExit needs this to stop the loop.
self.write_pos += 1;
}
/// Write multiple bytes starting from the current position.
@ -242,6 +372,9 @@ impl CodeBlock {
self.label_refs.push(LabelRef { pos: self.write_pos, label_idx, num_bytes, encode });
// Move past however many bytes the instruction takes up
if !self.has_capacity(num_bytes) {
self.dropped_bytes = true; // retry emitting the Insn after next_page
}
self.write_pos += num_bytes;
}
@ -274,14 +407,43 @@ impl CodeBlock {
assert!(self.label_refs.is_empty());
}
pub fn mark_all_executable(&mut self) {
self.mem_block.mark_all_executable();
pub fn clear_labels(&mut self) {
self.label_addrs.clear();
self.label_names.clear();
self.label_refs.clear();
}
pub fn get_label_state(&self) -> LabelState {
LabelState {
label_addrs: self.label_addrs.clone(),
label_names: self.label_names.clone(),
label_refs: self.label_refs.clone(),
}
}
pub fn set_label_state(&mut self, state: LabelState) {
self.label_addrs = state.label_addrs;
self.label_names = state.label_names;
self.label_refs = state.label_refs;
}
pub fn mark_all_executable(&mut self) {
self.mem_block.borrow_mut().mark_all_executable();
}
#[cfg(feature = "disasm")]
pub fn inline(&self) -> bool {
!self.outlined
}
pub fn other_cb(&self) -> Option<&'static mut Self> {
if !CodegenGlobals::has_instance() {
None
} else if self.inline() {
Some(CodegenGlobals::get_outlined_cb().unwrap())
} else {
Some(CodegenGlobals::get_inline_cb())
}
}
}
#[cfg(test)]
@ -295,7 +457,7 @@ impl CodeBlock {
let mem_start: *const u8 = alloc.mem_start();
let virt_mem = VirtualMem::new(alloc, 1, mem_start as *mut u8, mem_size);
Self::new(virt_mem, false)
Self::new(Rc::new(RefCell::new(virt_mem)), 16 * 1024, false)
}
}
@ -303,7 +465,7 @@ impl CodeBlock {
impl fmt::LowerHex for CodeBlock {
fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
for pos in 0..self.write_pos {
let byte = unsafe { self.mem_block.start_ptr().raw_ptr().add(pos).read() };
let byte = unsafe { self.mem_block.borrow().start_ptr().raw_ptr().add(pos).read() };
fmtr.write_fmt(format_args!("{:02x}", byte))?;
}
Ok(())

View File

@ -4,7 +4,7 @@
use crate::asm::{CodeBlock};
use crate::asm::arm64::*;
use crate::codegen::{JITState};
use crate::codegen::{JITState, CodegenGlobals};
use crate::cruby::*;
use crate::backend::ir::*;
use crate::virtualmem::CodePtr;
@ -36,6 +36,9 @@ pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG);
pub const C_SP_REG: A64Opnd = X31;
pub const C_SP_STEP: i32 = 16;
// The number of bytes that are generated by emit_jmp_ptr
pub const JMP_PTR_BYTES: usize = 20;
/// Map Opnd to A64Opnd
impl From<Opnd> for A64Opnd {
fn from(opnd: Opnd) -> Self {
@ -567,7 +570,7 @@ impl Assembler
/// Emit the required instructions to load the given value into the
/// given register. Our goal here is to use as few instructions as
/// possible to get this value into the register.
fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> i32 {
fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize {
let mut current = value;
if current <= 0xffff {
@ -680,6 +683,31 @@ impl Assembler
ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP));
}
fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr) {
let src_addr = cb.get_write_ptr().into_i64();
let dst_addr = dst_ptr.into_i64();
// If the offset is short enough, then we'll use the
// branch instruction. Otherwise, we'll move the
// destination into a register and use the branch
// register instruction.
let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) {
b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
1
} else {
let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64);
br(cb, Assembler::SCRATCH0);
num_insns + 1
};
// Make sure it's always a consistent number of
// instructions in case it gets patched and has to
// use the other branch.
for _ in num_insns..(JMP_PTR_BYTES / 4) {
nop(cb);
}
}
// dbg!(&self.insns);
// List of GC offsets
@ -687,7 +715,13 @@ impl Assembler
// For each instruction
let start_write_pos = cb.get_write_pos();
for insn in &self.insns {
let mut insn_idx: usize = 0;
while let Some(insn) = self.insns.get(insn_idx) {
let src_ptr = cb.get_write_ptr();
let had_dropped_bytes = cb.has_dropped_bytes();
let old_label_state = cb.get_label_state();
let mut insn_gc_offsets: Vec<u32> = Vec::new();
match insn {
Insn::Comment(text) => {
if cfg!(feature = "asm_comments") {
@ -796,7 +830,7 @@ impl Assembler
cb.write_bytes(&value.as_u64().to_le_bytes());
let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
gc_offsets.push(ptr_offset);
insn_gc_offsets.push(ptr_offset);
},
Opnd::None => {
unreachable!("Attempted to load from None operand");
@ -904,28 +938,7 @@ impl Assembler
Insn::Jmp(target) => {
match target {
Target::CodePtr(dst_ptr) => {
let src_addr = cb.get_write_ptr().into_i64();
let dst_addr = dst_ptr.into_i64();
// If the offset is short enough, then we'll use the
// branch instruction. Otherwise, we'll move the
// destination into a register and use the branch
// register instruction.
let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) {
b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
0
} else {
let num_insns = emit_load_value(cb, Self::SCRATCH0, dst_addr as u64);
br(cb, Self::SCRATCH0);
num_insns
};
// Make sure it's always a consistent number of
// instructions in case it gets patched and has to
// use the other branch.
for _ in num_insns..4 {
nop(cb);
}
emit_jmp_ptr(cb, *dst_ptr);
},
Target::Label(label_idx) => {
// Here we're going to save enough space for
@ -997,13 +1010,21 @@ impl Assembler
csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GE);
}
Insn::LiveReg { .. } => (), // just a reg alloc signal, no code
Insn::PadEntryExit => {
let jmp_len = 5 * 4; // Op::Jmp may emit 5 instructions
while (cb.get_write_pos() - start_write_pos) < jmp_len {
Insn::PadInvalPatch => {
while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < JMP_PTR_BYTES {
nop(cb);
}
}
};
// On failure, jump to the next page and retry the current insn
if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, emit_jmp_ptr) {
// Reset cb states before retrying the current Insn
cb.set_label_state(old_label_state);
} else {
insn_idx += 1;
gc_offsets.append(&mut insn_gc_offsets);
}
}
gc_offsets
@ -1020,21 +1041,23 @@ impl Assembler
assert!(label_idx == idx);
}
let start_write_pos = cb.get_write_pos();
let start_ptr = cb.get_write_ptr();
let gc_offsets = asm.arm64_emit(cb);
if !cb.has_dropped_bytes() {
if cb.has_dropped_bytes() {
cb.clear_labels();
} else {
cb.link_labels();
}
// Invalidate icache for newly written out region so we don't run stale code.
#[cfg(not(test))]
{
let start = cb.get_ptr(start_write_pos).raw_ptr();
let write_ptr = cb.get_write_ptr().raw_ptr();
let codeblock_end = cb.get_ptr(cb.get_mem_size()).raw_ptr();
let end = std::cmp::min(write_ptr, codeblock_end);
unsafe { rb_yjit_icache_invalidate(start as _, end as _) };
// Invalidate icache for newly written out region so we don't run stale code.
// It should invalidate only the code ranges of the current cb because the code
// ranges of the other cb might have a memory region that is still PROT_NONE.
#[cfg(not(test))]
cb.without_page_end_reserve(|cb| {
for (start, end) in cb.writable_addrs(start_ptr, cb.get_write_ptr()) {
unsafe { rb_yjit_icache_invalidate(start as _, end as _) };
}
});
}
gc_offsets

View File

@ -5,6 +5,7 @@
use std::cell::Cell;
use std::fmt;
use std::convert::From;
use std::io::Write;
use std::mem::take;
use crate::cruby::{VALUE};
use crate::virtualmem::{CodePtr};
@ -433,9 +434,9 @@ pub enum Insn {
// binary OR operation.
Or { left: Opnd, right: Opnd, out: Opnd },
/// Pad nop instructions to accomodate Op::Jmp in case the block is
/// invalidated.
PadEntryExit,
/// Pad nop instructions to accomodate Op::Jmp in case the block or the insn
/// is invalidated.
PadInvalPatch,
// Mark a position in the generated code
PosMarker(PosMarkerFn),
@ -521,7 +522,7 @@ impl Insn {
Insn::Mov { .. } => "Mov",
Insn::Not { .. } => "Not",
Insn::Or { .. } => "Or",
Insn::PadEntryExit => "PadEntryExit",
Insn::PadInvalPatch => "PadEntryExit",
Insn::PosMarker(_) => "PosMarker",
Insn::RShift { .. } => "RShift",
Insn::Store { .. } => "Store",
@ -658,7 +659,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
Insn::Jz(_) |
Insn::Label(_) |
Insn::LeaLabel { .. } |
Insn::PadEntryExit |
Insn::PadInvalPatch |
Insn::PosMarker(_) => None,
Insn::CPopInto(opnd) |
Insn::CPush(opnd) |
@ -755,7 +756,7 @@ impl<'a> InsnOpndMutIterator<'a> {
Insn::Jz(_) |
Insn::Label(_) |
Insn::LeaLabel { .. } |
Insn::PadEntryExit |
Insn::PadInvalPatch |
Insn::PosMarker(_) => None,
Insn::CPopInto(opnd) |
Insn::CPush(opnd) |
@ -1474,8 +1475,8 @@ impl Assembler {
out
}
pub fn pad_entry_exit(&mut self) {
self.push_insn(Insn::PadEntryExit);
pub fn pad_inval_patch(&mut self) {
self.push_insn(Insn::PadInvalPatch);
}
//pub fn pos_marker<F: FnMut(CodePtr)>(&mut self, marker_fn: F)

View File

@ -231,7 +231,7 @@ fn test_jcc_ptr()
{
let (mut asm, mut cb) = setup_asm();
let side_exit = Target::CodePtr((5 as *mut u8).into());
let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK));
asm.test(
Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG),
@ -248,7 +248,7 @@ fn test_jmp_ptr()
{
let (mut asm, mut cb) = setup_asm();
let stub = Target::CodePtr((5 as *mut u8).into());
let stub = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
asm.jmp(stub);
asm.compile_with_num_regs(&mut cb, 0);
@ -259,7 +259,7 @@ fn test_jo()
{
let (mut asm, mut cb) = setup_asm();
let side_exit = Target::CodePtr((5 as *mut u8).into());
let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
let arg1 = Opnd::mem(64, SP, 0);
let arg0 = Opnd::mem(64, SP, 8);

View File

@ -9,6 +9,7 @@ use crate::asm::x86_64::*;
use crate::codegen::{JITState};
use crate::cruby::*;
use crate::backend::ir::*;
use crate::codegen::CodegenGlobals;
// Use the x86 register type for this platform
pub type Reg = X86Reg;
@ -32,6 +33,9 @@ pub const _C_ARG_OPNDS: [Opnd; 6] = [
pub const C_RET_REG: Reg = RAX_REG;
pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG);
// The number of bytes that are generated by jmp_ptr
pub const JMP_PTR_BYTES: usize = 6;
/// Map Opnd to X86Opnd
impl From<Opnd> for X86Opnd {
fn from(opnd: Opnd) -> Self {
@ -375,7 +379,13 @@ impl Assembler
// For each instruction
let start_write_pos = cb.get_write_pos();
for insn in &self.insns {
let mut insns_idx: usize = 0;
while let Some(insn) = self.insns.get(insns_idx) {
let src_ptr = cb.get_write_ptr();
let had_dropped_bytes = cb.has_dropped_bytes();
let old_label_state = cb.get_label_state();
let mut insn_gc_offsets: Vec<u32> = Vec::new();
match insn {
Insn::Comment(text) => {
if cfg!(feature = "asm_comments") {
@ -461,7 +471,7 @@ impl Assembler
if !val.special_const_p() {
// The pointer immediate is encoded as the last part of the mov written out
let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
gc_offsets.push(ptr_offset);
insn_gc_offsets.push(ptr_offset);
}
}
},
@ -651,11 +661,10 @@ impl Assembler
emit_csel(cb, *truthy, *falsy, *out, cmovl);
}
Insn::LiveReg { .. } => (), // just a reg alloc signal, no code
Insn::PadEntryExit => {
// We assume that our Op::Jmp usage that gets invalidated is <= 5
let code_size: u32 = (cb.get_write_pos() - start_write_pos).try_into().unwrap();
if code_size < 5 {
nop(cb, 5 - code_size);
Insn::PadInvalPatch => {
let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()));
if code_size < JMP_PTR_BYTES {
nop(cb, (JMP_PTR_BYTES - code_size) as u32);
}
}
@ -666,6 +675,15 @@ impl Assembler
#[allow(unreachable_patterns)]
_ => panic!("unsupported instruction passed to x86 backend: {:?}", insn)
};
// On failure, jump to the next page and retry the current insn
if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, jmp_ptr) {
// Reset cb states before retrying the current Insn
cb.set_label_state(old_label_state);
} else {
insns_idx += 1;
gc_offsets.append(&mut insn_gc_offsets);
}
}
gc_offsets
@ -684,7 +702,9 @@ impl Assembler
let gc_offsets = asm.x86_emit(cb);
if !cb.has_dropped_bytes() {
if cb.has_dropped_bytes() {
cb.clear_labels();
} else {
cb.link_labels();
}

View File

@ -13,13 +13,15 @@ use crate::utils::*;
use CodegenStatus::*;
use InsnOpnd::*;
use std::cell::RefCell;
use std::cell::RefMut;
use std::cmp;
use std::collections::HashMap;
use std::ffi::CStr;
use std::mem::{self, size_of};
use std::os::raw::c_uint;
use std::ptr;
use std::rc::Rc;
use std::slice;
pub use crate::virtualmem::CodePtr;
@ -296,6 +298,7 @@ fn jit_prepare_routine_call(
/// Record the current codeblock write position for rewriting into a jump into
/// the outlined block later. Used to implement global code invalidation.
fn record_global_inval_patch(asm: &mut Assembler, outline_block_target_pos: CodePtr) {
asm.pad_inval_patch();
asm.pos_marker(move |code_ptr| {
CodegenGlobals::push_global_inval_patch(code_ptr, outline_block_target_pos);
});
@ -606,19 +609,6 @@ fn gen_pc_guard(asm: &mut Assembler, iseq: IseqPtr, insn_idx: u32) {
/// Compile an interpreter entry block to be inserted into an iseq
/// Returns None if compilation fails.
pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> Option<CodePtr> {
const MAX_PROLOGUE_SIZE: usize = 1024;
// Check if we have enough executable memory
if !cb.has_capacity(MAX_PROLOGUE_SIZE) {
return None;
}
let old_write_pos = cb.get_write_pos();
// TODO: figure out if this is actually beneficial for performance
// Align the current write position to cache line boundaries
cb.align_pos(64);
let code_ptr = cb.get_write_ptr();
let mut asm = Assembler::new();
@ -660,10 +650,11 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O
asm.compile(cb);
// Verify MAX_PROLOGUE_SIZE
assert!(cb.get_write_pos() - old_write_pos <= MAX_PROLOGUE_SIZE);
return Some(code_ptr);
if (cb.has_dropped_bytes()) {
None
} else {
Some(code_ptr)
}
}
// Generate code to check for interrupts and take a side-exit.
@ -853,7 +844,7 @@ pub fn gen_single_block(
{
let mut block = jit.block.borrow_mut();
if block.entry_exit.is_some() {
asm.pad_entry_exit();
asm.pad_inval_patch();
}
// Compile code into the code block
@ -6544,29 +6535,13 @@ static mut CODEGEN_GLOBALS: Option<CodegenGlobals> = None;
impl CodegenGlobals {
/// Initialize the codegen globals
pub fn init() {
// Executable memory size in MiB
let mem_size = get_option!(exec_mem_size) * 1024 * 1024;
// Executable memory and code page size in bytes
let mem_size = get_option!(exec_mem_size);
let code_page_size = get_option!(code_page_size);
#[cfg(not(test))]
let (mut cb, mut ocb) = {
// TODO(alan): we can error more gracefully when the user gives
// --yjit-exec-mem=absurdly-large-number
//
// 2 GiB. It's likely a bug if we generate this much code.
const MAX_BUFFER_SIZE: usize = 2 * 1024 * 1024 * 1024;
assert!(mem_size <= MAX_BUFFER_SIZE);
let mem_size_u32 = mem_size as u32;
let half_size = mem_size / 2;
let page_size = unsafe { rb_yjit_get_page_size() };
let assert_page_aligned = |ptr| assert_eq!(
0,
ptr as usize % page_size.as_usize(),
"Start of virtual address block should be page-aligned",
);
let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size_u32) };
let second_half = virt_block.wrapping_add(half_size);
let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size as u32) };
// Memory protection syscalls need page-aligned addresses, so check it here. Assuming
// `virt_block` is page-aligned, `second_half` should be page-aligned as long as the
@ -6575,26 +6550,25 @@ impl CodegenGlobals {
//
// Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB
// (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though.
assert_page_aligned(virt_block);
assert_page_aligned(second_half);
let page_size = unsafe { rb_yjit_get_page_size() };
assert_eq!(
virt_block as usize % page_size.as_usize(), 0,
"Start of virtual address block should be page-aligned",
);
assert_eq!(code_page_size % page_size.as_usize(), 0, "code_page_size was not page-aligned");
use crate::virtualmem::*;
let first_half = VirtualMem::new(
let mem_block = VirtualMem::new(
SystemAllocator {},
page_size,
virt_block,
half_size
);
let second_half = VirtualMem::new(
SystemAllocator {},
page_size,
second_half,
half_size
mem_size,
);
let mem_block = Rc::new(RefCell::new(mem_block));
let cb = CodeBlock::new(first_half, false);
let ocb = OutlinedCb::wrap(CodeBlock::new(second_half, true));
let cb = CodeBlock::new(mem_block.clone(), code_page_size, false);
let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, code_page_size, true));
(cb, ocb)
};
@ -6702,6 +6676,10 @@ impl CodegenGlobals {
unsafe { CODEGEN_GLOBALS.as_mut().unwrap() }
}
pub fn has_instance() -> bool {
unsafe { CODEGEN_GLOBALS.as_mut().is_some() }
}
/// Get a mutable reference to the inline code block
pub fn get_inline_cb() -> &'static mut CodeBlock {
&mut CodegenGlobals::get_instance().inline_cb

View File

@ -665,7 +665,7 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
if new_addr != object {
for (byte_idx, &byte) in new_addr.as_u64().to_le_bytes().iter().enumerate() {
let byte_code_ptr = value_code_ptr.add_bytes(byte_idx);
cb.get_mem().write_byte(byte_code_ptr, byte)
cb.write_mem(byte_code_ptr, byte)
.expect("patching existing code should be within bounds");
}
}
@ -1916,7 +1916,9 @@ pub fn gen_branch(
// Call the branch generation function
asm.mark_branch_start(&branchref);
gen_fn(asm, branch.dst_addrs[0].unwrap(), branch.dst_addrs[1], BranchShape::Default);
if let Some(dst_addr) = branch.dst_addrs[0] {
gen_fn(asm, dst_addr, branch.dst_addrs[1], BranchShape::Default);
}
asm.mark_branch_end(&branchref);
}
@ -1955,6 +1957,7 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, asm: &mu
branch.shape = BranchShape::Default;
// Call the branch generation function
asm.comment("gen_direct_jmp: existing block");
asm.mark_branch_start(&branchref);
gen_jump_branch(asm, branch.dst_addrs[0].unwrap(), None, BranchShape::Default);
asm.mark_branch_end(&branchref);
@ -1965,6 +1968,7 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, asm: &mu
branch.shape = BranchShape::Next0;
// The branch is effectively empty (a noop)
asm.comment("gen_direct_jmp: fallthrough");
asm.mark_branch_start(&branchref);
asm.mark_branch_end(&branchref);
}
@ -2003,7 +2007,9 @@ pub fn defer_compilation(
// Call the branch generation function
asm.mark_branch_start(&branch_rc);
gen_jump_branch(asm, branch.dst_addrs[0].unwrap(), None, BranchShape::Default);
if let Some(dst_addr) = branch.dst_addrs[0] {
gen_jump_branch(asm, dst_addr, None, BranchShape::Default);
}
asm.mark_branch_end(&branch_rc);
}

View File

@ -4,9 +4,14 @@ use std::ffi::CStr;
#[derive(Clone, PartialEq, Eq, Debug)]
#[repr(C)]
pub struct Options {
// Size of the executable memory block to allocate in MiB
// Size of the executable memory block to allocate in bytes
// Note that the command line argument is expressed in MiB and not bytes
pub exec_mem_size: usize,
// Size of each executable memory code page in bytes
// Note that the command line argument is expressed in KiB and not bytes
pub code_page_size: usize,
// Number of method calls after which to start generating code
// Threshold==1 means compile on first execution
pub call_threshold: usize,
@ -48,7 +53,8 @@ pub struct Options {
// Initialize the options to default values
pub static mut OPTIONS: Options = Options {
exec_mem_size: 256,
exec_mem_size: 256 * 1024 * 1024,
code_page_size: 16 * 1024,
call_threshold: 10,
greedy_versioning: false,
no_type_prop: false,
@ -118,8 +124,30 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
match (opt_name, opt_val) {
("", "") => (), // Simply --yjit
("exec-mem-size", _) => match opt_val.parse() {
Ok(n) => unsafe { OPTIONS.exec_mem_size = n },
("exec-mem-size", _) => match opt_val.parse::<usize>() {
Ok(n) => {
if n == 0 || n > 2 * 1024 * 1024 {
return None
}
// Convert from MiB to bytes internally for convenience
unsafe { OPTIONS.exec_mem_size = n * 1024 * 1024 }
}
Err(_) => {
return None;
}
},
("code-page-size", _) => match opt_val.parse::<usize>() {
Ok(n) => {
// Enforce bounds checks and that n is divisible by 4KiB
if n < 4 || n > 256 || n % 4 != 0 {
return None
}
// Convert from KiB to bytes internally for convenience
unsafe { OPTIONS.code_page_size = n * 1024 }
}
Err(_) => {
return None;
}

View File

@ -74,14 +74,13 @@ pub(crate) use offset_of;
// This should work fine on ASCII strings and anything else
// that is considered legal UTF-8, including embedded nulls.
fn ruby_str_to_rust(v: VALUE) -> String {
// Make sure the CRuby encoding is UTF-8 compatible
let encoding = unsafe { rb_ENCODING_GET(v) } as u32;
assert!(encoding == RUBY_ENCINDEX_ASCII_8BIT || encoding == RUBY_ENCINDEX_UTF_8 || encoding == RUBY_ENCINDEX_US_ASCII);
let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8;
let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap();
let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) };
String::from_utf8(str_slice.to_vec()).unwrap() // does utf8 validation
match String::from_utf8(str_slice.to_vec()) {
Ok(utf8) => utf8,
Err(_) => String::new(),
}
}
// Location is the file defining the method, colon, method name.