YJIT: Interleave inline and outlined code blocks (#6460)
Co-authored-by: Alan Wu <alansi.xingwu@shopify.com> Co-authored-by: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@shopify.com>
This commit is contained in:
parent
e7c71c6c92
commit
64c52c4282
Notes:
git
2022-10-17 17:46:19 +00:00
Merged-By: k0kubun <takashikkbn@gmail.com>
@ -1,9 +1,20 @@
|
||||
use std::cell::RefCell;
|
||||
use std::cmp;
|
||||
use std::fmt;
|
||||
use std::mem;
|
||||
use std::rc::Rc;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use crate::backend::x86_64::JMP_PTR_BYTES;
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
use crate::backend::arm64::JMP_PTR_BYTES;
|
||||
use crate::backend::ir::Assembler;
|
||||
use crate::backend::ir::Target;
|
||||
use crate::virtualmem::WriteError;
|
||||
|
||||
#[cfg(feature = "asm_comments")]
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::codegen::CodegenGlobals;
|
||||
use crate::virtualmem::{VirtualMem, CodePtr};
|
||||
|
||||
// Lots of manual vertical alignment in there that rustfmt doesn't handle well.
|
||||
@ -17,7 +28,8 @@ pub mod arm64;
|
||||
//
|
||||
|
||||
/// Reference to an ASM label
|
||||
struct LabelRef {
|
||||
#[derive(Clone)]
|
||||
pub struct LabelRef {
|
||||
// Position in the code block where the label reference exists
|
||||
pos: usize,
|
||||
|
||||
@ -36,7 +48,7 @@ struct LabelRef {
|
||||
/// Block of memory into which instructions can be assembled
|
||||
pub struct CodeBlock {
|
||||
// Memory for storing the encoded instructions
|
||||
mem_block: VirtualMem,
|
||||
mem_block: Rc<RefCell<VirtualMem>>,
|
||||
|
||||
// Memory block size
|
||||
mem_size: usize,
|
||||
@ -44,6 +56,12 @@ pub struct CodeBlock {
|
||||
// Current writing position
|
||||
write_pos: usize,
|
||||
|
||||
// Size of a code page (inlined + outlined)
|
||||
page_size: usize,
|
||||
|
||||
// Size reserved for writing a jump to the next page
|
||||
page_end_reserve: usize,
|
||||
|
||||
// Table of registered label addresses
|
||||
label_addrs: Vec<usize>,
|
||||
|
||||
@ -58,7 +76,6 @@ pub struct CodeBlock {
|
||||
asm_comments: BTreeMap<usize, Vec<String>>,
|
||||
|
||||
// True for OutlinedCb
|
||||
#[cfg(feature = "disasm")]
|
||||
pub outlined: bool,
|
||||
|
||||
// Set if the CodeBlock is unable to output some instructions,
|
||||
@ -67,27 +84,158 @@ pub struct CodeBlock {
|
||||
dropped_bytes: bool,
|
||||
}
|
||||
|
||||
/// Set of CodeBlock label states. Used for recovering the previous state.
|
||||
pub struct LabelState {
|
||||
label_addrs: Vec<usize>,
|
||||
label_names: Vec<String>,
|
||||
label_refs: Vec<LabelRef>,
|
||||
}
|
||||
|
||||
impl CodeBlock {
|
||||
/// Make a new CodeBlock
|
||||
pub fn new(mem_block: VirtualMem, outlined: bool) -> Self {
|
||||
Self {
|
||||
mem_size: mem_block.virtual_region_size(),
|
||||
pub fn new(mem_block: Rc<RefCell<VirtualMem>>, page_size: usize, outlined: bool) -> Self {
|
||||
let mem_size = mem_block.borrow().virtual_region_size();
|
||||
let mut cb = Self {
|
||||
mem_block,
|
||||
mem_size,
|
||||
write_pos: 0,
|
||||
page_size,
|
||||
page_end_reserve: JMP_PTR_BYTES,
|
||||
label_addrs: Vec::new(),
|
||||
label_names: Vec::new(),
|
||||
label_refs: Vec::new(),
|
||||
#[cfg(feature = "asm_comments")]
|
||||
asm_comments: BTreeMap::new(),
|
||||
#[cfg(feature = "disasm")]
|
||||
outlined,
|
||||
dropped_bytes: false,
|
||||
};
|
||||
cb.write_pos = cb.page_start();
|
||||
cb
|
||||
}
|
||||
|
||||
/// Move the CodeBlock to the next page. If it's on the furthest page,
|
||||
/// move the other CodeBlock to the next page as well.
|
||||
pub fn next_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, base_ptr: CodePtr, jmp_ptr: F) -> bool {
|
||||
let old_write_ptr = self.get_write_ptr();
|
||||
self.set_write_ptr(base_ptr);
|
||||
self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES)));
|
||||
|
||||
// Move self to the next page
|
||||
let next_page_idx = self.write_pos / self.page_size + 1;
|
||||
if !self.set_page(next_page_idx, &jmp_ptr) {
|
||||
self.set_write_ptr(old_write_ptr); // rollback if there are no more pages
|
||||
return false;
|
||||
}
|
||||
|
||||
// Move the other CodeBlock to the same page if it'S on the furthest page
|
||||
self.other_cb().unwrap().set_page(next_page_idx, &jmp_ptr);
|
||||
|
||||
return !self.dropped_bytes;
|
||||
}
|
||||
|
||||
/// Move the CodeBlock to page_idx only if it's not going backwards.
|
||||
fn set_page<F: Fn(&mut CodeBlock, CodePtr)>(&mut self, page_idx: usize, jmp_ptr: &F) -> bool {
|
||||
// Do not move the CodeBlock if page_idx points to an old position so that this
|
||||
// CodeBlock will not overwrite existing code.
|
||||
//
|
||||
// Let's say this is the current situation:
|
||||
// cb: [page1, page2, page3 (write_pos)], ocb: [page1, page2, page3 (write_pos)]
|
||||
//
|
||||
// When cb needs to patch page1, this will be temporarily changed to:
|
||||
// cb: [page1 (write_pos), page2, page3], ocb: [page1, page2, page3 (write_pos)]
|
||||
//
|
||||
// While patching page1, cb may need to jump to page2. What set_page currently does is:
|
||||
// cb: [page1, page2 (write_pos), page3], ocb: [page1, page2, page3 (write_pos)]
|
||||
// instead of:
|
||||
// cb: [page1, page2 (write_pos), page3], ocb: [page1, page2 (write_pos), page3]
|
||||
// because moving ocb's write_pos from page3 to the beginning of page2 will let ocb's
|
||||
// write_pos point to existing code in page2, which might let ocb overwrite it later.
|
||||
//
|
||||
// We could remember the last write_pos in page2 and let set_page use that position,
|
||||
// but you need to waste some space for keeping write_pos for every single page.
|
||||
// It doesn't seem necessary for performance either. So we're currently not doing it.
|
||||
let mut dst_pos = self.page_size * page_idx + self.page_start();
|
||||
if self.page_size * page_idx < self.mem_size && self.write_pos < dst_pos {
|
||||
// Reset dropped_bytes
|
||||
self.dropped_bytes = false;
|
||||
|
||||
// Convert dst_pos to dst_ptr
|
||||
let src_pos = self.write_pos;
|
||||
self.write_pos = dst_pos;
|
||||
let dst_ptr = self.get_write_ptr();
|
||||
self.write_pos = src_pos;
|
||||
|
||||
// Generate jmp_ptr from src_pos to dst_pos
|
||||
self.without_page_end_reserve(|cb| {
|
||||
cb.add_comment("jump to next page");
|
||||
jmp_ptr(cb, dst_ptr);
|
||||
assert!(!cb.has_dropped_bytes());
|
||||
});
|
||||
|
||||
// Start the next code from dst_pos
|
||||
self.write_pos = dst_pos;
|
||||
}
|
||||
!self.dropped_bytes
|
||||
}
|
||||
|
||||
/// write_pos of the current page start
|
||||
pub fn page_start_pos(&self) -> usize {
|
||||
self.get_write_pos() / self.page_size * self.page_size + self.page_start()
|
||||
}
|
||||
|
||||
/// Offset of each page where CodeBlock should start writing
|
||||
pub fn page_start(&self) -> usize {
|
||||
let mut start = if self.inline() {
|
||||
0
|
||||
} else {
|
||||
self.page_size / 2
|
||||
};
|
||||
if cfg!(debug_assertions) && !cfg!(test) {
|
||||
// Leave illegal instructions at the beginning of each page to assert
|
||||
// we're not accidentally crossing page boundaries.
|
||||
start += JMP_PTR_BYTES;
|
||||
}
|
||||
start
|
||||
}
|
||||
|
||||
/// Offset of each page where CodeBlock should stop writing (exclusive)
|
||||
pub fn page_end(&self) -> usize {
|
||||
let page_end = if self.inline() {
|
||||
self.page_size / 2
|
||||
} else {
|
||||
self.page_size
|
||||
};
|
||||
page_end - self.page_end_reserve // reserve space to jump to the next page
|
||||
}
|
||||
|
||||
/// Call a given function with page_end_reserve = 0
|
||||
pub fn without_page_end_reserve<F: Fn(&mut Self)>(&mut self, block: F) {
|
||||
let old_page_end_reserve = self.page_end_reserve;
|
||||
self.page_end_reserve = 0;
|
||||
block(self);
|
||||
self.page_end_reserve = old_page_end_reserve;
|
||||
}
|
||||
|
||||
/// Return the address ranges of a given address range that this CodeBlock can write.
|
||||
pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
|
||||
let mut addrs = vec![];
|
||||
let mut start = start_ptr.raw_ptr() as usize;
|
||||
let codeblock_end = self.get_ptr(self.get_mem_size()).raw_ptr() as usize;
|
||||
let end = std::cmp::min(end_ptr.raw_ptr() as usize, codeblock_end);
|
||||
while start < end {
|
||||
let current_page = start / self.page_size * self.page_size;
|
||||
let page_end = std::cmp::min(end, current_page + self.page_end()) as usize;
|
||||
addrs.push((start, page_end));
|
||||
start = current_page + self.page_size + self.page_start();
|
||||
}
|
||||
addrs
|
||||
}
|
||||
|
||||
/// Check if this code block has sufficient remaining capacity
|
||||
pub fn has_capacity(&self, num_bytes: usize) -> bool {
|
||||
self.write_pos + num_bytes < self.mem_size
|
||||
let page_offset = self.write_pos % self.page_size;
|
||||
let capacity = self.page_end().saturating_sub(page_offset);
|
||||
num_bytes <= capacity
|
||||
}
|
||||
|
||||
/// Add an assembly comment if the feature is on.
|
||||
@ -121,8 +269,8 @@ impl CodeBlock {
|
||||
self.write_pos
|
||||
}
|
||||
|
||||
pub fn get_mem(&mut self) -> &mut VirtualMem {
|
||||
&mut self.mem_block
|
||||
pub fn write_mem(&self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> {
|
||||
self.mem_block.borrow_mut().write_byte(write_ptr, byte)
|
||||
}
|
||||
|
||||
// Set the current write position
|
||||
@ -134,49 +282,31 @@ impl CodeBlock {
|
||||
self.write_pos = pos;
|
||||
}
|
||||
|
||||
// Align the current write pointer to a multiple of bytes
|
||||
pub fn align_pos(&mut self, multiple: u32) {
|
||||
// Compute the alignment boundary that is lower or equal
|
||||
// Do everything with usize
|
||||
let multiple: usize = multiple.try_into().unwrap();
|
||||
let pos = self.get_write_ptr().raw_ptr() as usize;
|
||||
let remainder = pos % multiple;
|
||||
let prev_aligned = pos - remainder;
|
||||
|
||||
if prev_aligned == pos {
|
||||
// Already aligned so do nothing
|
||||
} else {
|
||||
// Align by advancing
|
||||
let pad = multiple - remainder;
|
||||
self.set_pos(self.get_write_pos() + pad);
|
||||
}
|
||||
}
|
||||
|
||||
// Set the current write position from a pointer
|
||||
pub fn set_write_ptr(&mut self, code_ptr: CodePtr) {
|
||||
let pos = code_ptr.into_usize() - self.mem_block.start_ptr().into_usize();
|
||||
let pos = code_ptr.into_usize() - self.mem_block.borrow().start_ptr().into_usize();
|
||||
self.set_pos(pos);
|
||||
}
|
||||
|
||||
/// Get a (possibly dangling) direct pointer into the executable memory block
|
||||
pub fn get_ptr(&self, offset: usize) -> CodePtr {
|
||||
self.mem_block.start_ptr().add_bytes(offset)
|
||||
self.mem_block.borrow().start_ptr().add_bytes(offset)
|
||||
}
|
||||
|
||||
/// Get a (possibly dangling) direct pointer to the current write position
|
||||
pub fn get_write_ptr(&mut self) -> CodePtr {
|
||||
pub fn get_write_ptr(&self) -> CodePtr {
|
||||
self.get_ptr(self.write_pos)
|
||||
}
|
||||
|
||||
/// Write a single byte at the current position.
|
||||
pub fn write_byte(&mut self, byte: u8) {
|
||||
let write_ptr = self.get_write_ptr();
|
||||
|
||||
if self.mem_block.write_byte(write_ptr, byte).is_ok() {
|
||||
self.write_pos += 1;
|
||||
} else {
|
||||
if !self.has_capacity(1) || self.mem_block.borrow_mut().write_byte(write_ptr, byte).is_err() {
|
||||
self.dropped_bytes = true;
|
||||
}
|
||||
|
||||
// Always advance write_pos since arm64 PadEntryExit needs this to stop the loop.
|
||||
self.write_pos += 1;
|
||||
}
|
||||
|
||||
/// Write multiple bytes starting from the current position.
|
||||
@ -242,6 +372,9 @@ impl CodeBlock {
|
||||
self.label_refs.push(LabelRef { pos: self.write_pos, label_idx, num_bytes, encode });
|
||||
|
||||
// Move past however many bytes the instruction takes up
|
||||
if !self.has_capacity(num_bytes) {
|
||||
self.dropped_bytes = true; // retry emitting the Insn after next_page
|
||||
}
|
||||
self.write_pos += num_bytes;
|
||||
}
|
||||
|
||||
@ -274,14 +407,43 @@ impl CodeBlock {
|
||||
assert!(self.label_refs.is_empty());
|
||||
}
|
||||
|
||||
pub fn mark_all_executable(&mut self) {
|
||||
self.mem_block.mark_all_executable();
|
||||
pub fn clear_labels(&mut self) {
|
||||
self.label_addrs.clear();
|
||||
self.label_names.clear();
|
||||
self.label_refs.clear();
|
||||
}
|
||||
|
||||
pub fn get_label_state(&self) -> LabelState {
|
||||
LabelState {
|
||||
label_addrs: self.label_addrs.clone(),
|
||||
label_names: self.label_names.clone(),
|
||||
label_refs: self.label_refs.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_label_state(&mut self, state: LabelState) {
|
||||
self.label_addrs = state.label_addrs;
|
||||
self.label_names = state.label_names;
|
||||
self.label_refs = state.label_refs;
|
||||
}
|
||||
|
||||
pub fn mark_all_executable(&mut self) {
|
||||
self.mem_block.borrow_mut().mark_all_executable();
|
||||
}
|
||||
|
||||
#[cfg(feature = "disasm")]
|
||||
pub fn inline(&self) -> bool {
|
||||
!self.outlined
|
||||
}
|
||||
|
||||
pub fn other_cb(&self) -> Option<&'static mut Self> {
|
||||
if !CodegenGlobals::has_instance() {
|
||||
None
|
||||
} else if self.inline() {
|
||||
Some(CodegenGlobals::get_outlined_cb().unwrap())
|
||||
} else {
|
||||
Some(CodegenGlobals::get_inline_cb())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@ -295,7 +457,7 @@ impl CodeBlock {
|
||||
let mem_start: *const u8 = alloc.mem_start();
|
||||
let virt_mem = VirtualMem::new(alloc, 1, mem_start as *mut u8, mem_size);
|
||||
|
||||
Self::new(virt_mem, false)
|
||||
Self::new(Rc::new(RefCell::new(virt_mem)), 16 * 1024, false)
|
||||
}
|
||||
}
|
||||
|
||||
@ -303,7 +465,7 @@ impl CodeBlock {
|
||||
impl fmt::LowerHex for CodeBlock {
|
||||
fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
|
||||
for pos in 0..self.write_pos {
|
||||
let byte = unsafe { self.mem_block.start_ptr().raw_ptr().add(pos).read() };
|
||||
let byte = unsafe { self.mem_block.borrow().start_ptr().raw_ptr().add(pos).read() };
|
||||
fmtr.write_fmt(format_args!("{:02x}", byte))?;
|
||||
}
|
||||
Ok(())
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
use crate::asm::{CodeBlock};
|
||||
use crate::asm::arm64::*;
|
||||
use crate::codegen::{JITState};
|
||||
use crate::codegen::{JITState, CodegenGlobals};
|
||||
use crate::cruby::*;
|
||||
use crate::backend::ir::*;
|
||||
use crate::virtualmem::CodePtr;
|
||||
@ -36,6 +36,9 @@ pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG);
|
||||
pub const C_SP_REG: A64Opnd = X31;
|
||||
pub const C_SP_STEP: i32 = 16;
|
||||
|
||||
// The number of bytes that are generated by emit_jmp_ptr
|
||||
pub const JMP_PTR_BYTES: usize = 20;
|
||||
|
||||
/// Map Opnd to A64Opnd
|
||||
impl From<Opnd> for A64Opnd {
|
||||
fn from(opnd: Opnd) -> Self {
|
||||
@ -567,7 +570,7 @@ impl Assembler
|
||||
/// Emit the required instructions to load the given value into the
|
||||
/// given register. Our goal here is to use as few instructions as
|
||||
/// possible to get this value into the register.
|
||||
fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> i32 {
|
||||
fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize {
|
||||
let mut current = value;
|
||||
|
||||
if current <= 0xffff {
|
||||
@ -680,6 +683,31 @@ impl Assembler
|
||||
ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP));
|
||||
}
|
||||
|
||||
fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr) {
|
||||
let src_addr = cb.get_write_ptr().into_i64();
|
||||
let dst_addr = dst_ptr.into_i64();
|
||||
|
||||
// If the offset is short enough, then we'll use the
|
||||
// branch instruction. Otherwise, we'll move the
|
||||
// destination into a register and use the branch
|
||||
// register instruction.
|
||||
let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) {
|
||||
b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
|
||||
1
|
||||
} else {
|
||||
let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64);
|
||||
br(cb, Assembler::SCRATCH0);
|
||||
num_insns + 1
|
||||
};
|
||||
|
||||
// Make sure it's always a consistent number of
|
||||
// instructions in case it gets patched and has to
|
||||
// use the other branch.
|
||||
for _ in num_insns..(JMP_PTR_BYTES / 4) {
|
||||
nop(cb);
|
||||
}
|
||||
}
|
||||
|
||||
// dbg!(&self.insns);
|
||||
|
||||
// List of GC offsets
|
||||
@ -687,7 +715,13 @@ impl Assembler
|
||||
|
||||
// For each instruction
|
||||
let start_write_pos = cb.get_write_pos();
|
||||
for insn in &self.insns {
|
||||
let mut insn_idx: usize = 0;
|
||||
while let Some(insn) = self.insns.get(insn_idx) {
|
||||
let src_ptr = cb.get_write_ptr();
|
||||
let had_dropped_bytes = cb.has_dropped_bytes();
|
||||
let old_label_state = cb.get_label_state();
|
||||
let mut insn_gc_offsets: Vec<u32> = Vec::new();
|
||||
|
||||
match insn {
|
||||
Insn::Comment(text) => {
|
||||
if cfg!(feature = "asm_comments") {
|
||||
@ -796,7 +830,7 @@ impl Assembler
|
||||
cb.write_bytes(&value.as_u64().to_le_bytes());
|
||||
|
||||
let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
|
||||
gc_offsets.push(ptr_offset);
|
||||
insn_gc_offsets.push(ptr_offset);
|
||||
},
|
||||
Opnd::None => {
|
||||
unreachable!("Attempted to load from None operand");
|
||||
@ -904,28 +938,7 @@ impl Assembler
|
||||
Insn::Jmp(target) => {
|
||||
match target {
|
||||
Target::CodePtr(dst_ptr) => {
|
||||
let src_addr = cb.get_write_ptr().into_i64();
|
||||
let dst_addr = dst_ptr.into_i64();
|
||||
|
||||
// If the offset is short enough, then we'll use the
|
||||
// branch instruction. Otherwise, we'll move the
|
||||
// destination into a register and use the branch
|
||||
// register instruction.
|
||||
let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) {
|
||||
b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32));
|
||||
0
|
||||
} else {
|
||||
let num_insns = emit_load_value(cb, Self::SCRATCH0, dst_addr as u64);
|
||||
br(cb, Self::SCRATCH0);
|
||||
num_insns
|
||||
};
|
||||
|
||||
// Make sure it's always a consistent number of
|
||||
// instructions in case it gets patched and has to
|
||||
// use the other branch.
|
||||
for _ in num_insns..4 {
|
||||
nop(cb);
|
||||
}
|
||||
emit_jmp_ptr(cb, *dst_ptr);
|
||||
},
|
||||
Target::Label(label_idx) => {
|
||||
// Here we're going to save enough space for
|
||||
@ -997,13 +1010,21 @@ impl Assembler
|
||||
csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GE);
|
||||
}
|
||||
Insn::LiveReg { .. } => (), // just a reg alloc signal, no code
|
||||
Insn::PadEntryExit => {
|
||||
let jmp_len = 5 * 4; // Op::Jmp may emit 5 instructions
|
||||
while (cb.get_write_pos() - start_write_pos) < jmp_len {
|
||||
Insn::PadInvalPatch => {
|
||||
while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < JMP_PTR_BYTES {
|
||||
nop(cb);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// On failure, jump to the next page and retry the current insn
|
||||
if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, emit_jmp_ptr) {
|
||||
// Reset cb states before retrying the current Insn
|
||||
cb.set_label_state(old_label_state);
|
||||
} else {
|
||||
insn_idx += 1;
|
||||
gc_offsets.append(&mut insn_gc_offsets);
|
||||
}
|
||||
}
|
||||
|
||||
gc_offsets
|
||||
@ -1020,21 +1041,23 @@ impl Assembler
|
||||
assert!(label_idx == idx);
|
||||
}
|
||||
|
||||
let start_write_pos = cb.get_write_pos();
|
||||
let start_ptr = cb.get_write_ptr();
|
||||
let gc_offsets = asm.arm64_emit(cb);
|
||||
|
||||
if !cb.has_dropped_bytes() {
|
||||
if cb.has_dropped_bytes() {
|
||||
cb.clear_labels();
|
||||
} else {
|
||||
cb.link_labels();
|
||||
}
|
||||
|
||||
// Invalidate icache for newly written out region so we don't run stale code.
|
||||
#[cfg(not(test))]
|
||||
{
|
||||
let start = cb.get_ptr(start_write_pos).raw_ptr();
|
||||
let write_ptr = cb.get_write_ptr().raw_ptr();
|
||||
let codeblock_end = cb.get_ptr(cb.get_mem_size()).raw_ptr();
|
||||
let end = std::cmp::min(write_ptr, codeblock_end);
|
||||
unsafe { rb_yjit_icache_invalidate(start as _, end as _) };
|
||||
// Invalidate icache for newly written out region so we don't run stale code.
|
||||
// It should invalidate only the code ranges of the current cb because the code
|
||||
// ranges of the other cb might have a memory region that is still PROT_NONE.
|
||||
#[cfg(not(test))]
|
||||
cb.without_page_end_reserve(|cb| {
|
||||
for (start, end) in cb.writable_addrs(start_ptr, cb.get_write_ptr()) {
|
||||
unsafe { rb_yjit_icache_invalidate(start as _, end as _) };
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
gc_offsets
|
||||
|
@ -5,6 +5,7 @@
|
||||
use std::cell::Cell;
|
||||
use std::fmt;
|
||||
use std::convert::From;
|
||||
use std::io::Write;
|
||||
use std::mem::take;
|
||||
use crate::cruby::{VALUE};
|
||||
use crate::virtualmem::{CodePtr};
|
||||
@ -433,9 +434,9 @@ pub enum Insn {
|
||||
// binary OR operation.
|
||||
Or { left: Opnd, right: Opnd, out: Opnd },
|
||||
|
||||
/// Pad nop instructions to accomodate Op::Jmp in case the block is
|
||||
/// invalidated.
|
||||
PadEntryExit,
|
||||
/// Pad nop instructions to accomodate Op::Jmp in case the block or the insn
|
||||
/// is invalidated.
|
||||
PadInvalPatch,
|
||||
|
||||
// Mark a position in the generated code
|
||||
PosMarker(PosMarkerFn),
|
||||
@ -521,7 +522,7 @@ impl Insn {
|
||||
Insn::Mov { .. } => "Mov",
|
||||
Insn::Not { .. } => "Not",
|
||||
Insn::Or { .. } => "Or",
|
||||
Insn::PadEntryExit => "PadEntryExit",
|
||||
Insn::PadInvalPatch => "PadEntryExit",
|
||||
Insn::PosMarker(_) => "PosMarker",
|
||||
Insn::RShift { .. } => "RShift",
|
||||
Insn::Store { .. } => "Store",
|
||||
@ -658,7 +659,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
|
||||
Insn::Jz(_) |
|
||||
Insn::Label(_) |
|
||||
Insn::LeaLabel { .. } |
|
||||
Insn::PadEntryExit |
|
||||
Insn::PadInvalPatch |
|
||||
Insn::PosMarker(_) => None,
|
||||
Insn::CPopInto(opnd) |
|
||||
Insn::CPush(opnd) |
|
||||
@ -755,7 +756,7 @@ impl<'a> InsnOpndMutIterator<'a> {
|
||||
Insn::Jz(_) |
|
||||
Insn::Label(_) |
|
||||
Insn::LeaLabel { .. } |
|
||||
Insn::PadEntryExit |
|
||||
Insn::PadInvalPatch |
|
||||
Insn::PosMarker(_) => None,
|
||||
Insn::CPopInto(opnd) |
|
||||
Insn::CPush(opnd) |
|
||||
@ -1474,8 +1475,8 @@ impl Assembler {
|
||||
out
|
||||
}
|
||||
|
||||
pub fn pad_entry_exit(&mut self) {
|
||||
self.push_insn(Insn::PadEntryExit);
|
||||
pub fn pad_inval_patch(&mut self) {
|
||||
self.push_insn(Insn::PadInvalPatch);
|
||||
}
|
||||
|
||||
//pub fn pos_marker<F: FnMut(CodePtr)>(&mut self, marker_fn: F)
|
||||
|
@ -231,7 +231,7 @@ fn test_jcc_ptr()
|
||||
{
|
||||
let (mut asm, mut cb) = setup_asm();
|
||||
|
||||
let side_exit = Target::CodePtr((5 as *mut u8).into());
|
||||
let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
|
||||
let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK));
|
||||
asm.test(
|
||||
Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG),
|
||||
@ -248,7 +248,7 @@ fn test_jmp_ptr()
|
||||
{
|
||||
let (mut asm, mut cb) = setup_asm();
|
||||
|
||||
let stub = Target::CodePtr((5 as *mut u8).into());
|
||||
let stub = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
|
||||
asm.jmp(stub);
|
||||
|
||||
asm.compile_with_num_regs(&mut cb, 0);
|
||||
@ -259,7 +259,7 @@ fn test_jo()
|
||||
{
|
||||
let (mut asm, mut cb) = setup_asm();
|
||||
|
||||
let side_exit = Target::CodePtr((5 as *mut u8).into());
|
||||
let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
|
||||
|
||||
let arg1 = Opnd::mem(64, SP, 0);
|
||||
let arg0 = Opnd::mem(64, SP, 8);
|
||||
|
@ -9,6 +9,7 @@ use crate::asm::x86_64::*;
|
||||
use crate::codegen::{JITState};
|
||||
use crate::cruby::*;
|
||||
use crate::backend::ir::*;
|
||||
use crate::codegen::CodegenGlobals;
|
||||
|
||||
// Use the x86 register type for this platform
|
||||
pub type Reg = X86Reg;
|
||||
@ -32,6 +33,9 @@ pub const _C_ARG_OPNDS: [Opnd; 6] = [
|
||||
pub const C_RET_REG: Reg = RAX_REG;
|
||||
pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG);
|
||||
|
||||
// The number of bytes that are generated by jmp_ptr
|
||||
pub const JMP_PTR_BYTES: usize = 6;
|
||||
|
||||
/// Map Opnd to X86Opnd
|
||||
impl From<Opnd> for X86Opnd {
|
||||
fn from(opnd: Opnd) -> Self {
|
||||
@ -375,7 +379,13 @@ impl Assembler
|
||||
|
||||
// For each instruction
|
||||
let start_write_pos = cb.get_write_pos();
|
||||
for insn in &self.insns {
|
||||
let mut insns_idx: usize = 0;
|
||||
while let Some(insn) = self.insns.get(insns_idx) {
|
||||
let src_ptr = cb.get_write_ptr();
|
||||
let had_dropped_bytes = cb.has_dropped_bytes();
|
||||
let old_label_state = cb.get_label_state();
|
||||
let mut insn_gc_offsets: Vec<u32> = Vec::new();
|
||||
|
||||
match insn {
|
||||
Insn::Comment(text) => {
|
||||
if cfg!(feature = "asm_comments") {
|
||||
@ -461,7 +471,7 @@ impl Assembler
|
||||
if !val.special_const_p() {
|
||||
// The pointer immediate is encoded as the last part of the mov written out
|
||||
let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
|
||||
gc_offsets.push(ptr_offset);
|
||||
insn_gc_offsets.push(ptr_offset);
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -651,11 +661,10 @@ impl Assembler
|
||||
emit_csel(cb, *truthy, *falsy, *out, cmovl);
|
||||
}
|
||||
Insn::LiveReg { .. } => (), // just a reg alloc signal, no code
|
||||
Insn::PadEntryExit => {
|
||||
// We assume that our Op::Jmp usage that gets invalidated is <= 5
|
||||
let code_size: u32 = (cb.get_write_pos() - start_write_pos).try_into().unwrap();
|
||||
if code_size < 5 {
|
||||
nop(cb, 5 - code_size);
|
||||
Insn::PadInvalPatch => {
|
||||
let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()));
|
||||
if code_size < JMP_PTR_BYTES {
|
||||
nop(cb, (JMP_PTR_BYTES - code_size) as u32);
|
||||
}
|
||||
}
|
||||
|
||||
@ -666,6 +675,15 @@ impl Assembler
|
||||
#[allow(unreachable_patterns)]
|
||||
_ => panic!("unsupported instruction passed to x86 backend: {:?}", insn)
|
||||
};
|
||||
|
||||
// On failure, jump to the next page and retry the current insn
|
||||
if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, jmp_ptr) {
|
||||
// Reset cb states before retrying the current Insn
|
||||
cb.set_label_state(old_label_state);
|
||||
} else {
|
||||
insns_idx += 1;
|
||||
gc_offsets.append(&mut insn_gc_offsets);
|
||||
}
|
||||
}
|
||||
|
||||
gc_offsets
|
||||
@ -684,7 +702,9 @@ impl Assembler
|
||||
|
||||
let gc_offsets = asm.x86_emit(cb);
|
||||
|
||||
if !cb.has_dropped_bytes() {
|
||||
if cb.has_dropped_bytes() {
|
||||
cb.clear_labels();
|
||||
} else {
|
||||
cb.link_labels();
|
||||
}
|
||||
|
||||
|
@ -13,13 +13,15 @@ use crate::utils::*;
|
||||
use CodegenStatus::*;
|
||||
use InsnOpnd::*;
|
||||
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::cell::RefMut;
|
||||
use std::cmp;
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::CStr;
|
||||
use std::mem::{self, size_of};
|
||||
use std::os::raw::c_uint;
|
||||
use std::ptr;
|
||||
use std::rc::Rc;
|
||||
use std::slice;
|
||||
|
||||
pub use crate::virtualmem::CodePtr;
|
||||
@ -296,6 +298,7 @@ fn jit_prepare_routine_call(
|
||||
/// Record the current codeblock write position for rewriting into a jump into
|
||||
/// the outlined block later. Used to implement global code invalidation.
|
||||
fn record_global_inval_patch(asm: &mut Assembler, outline_block_target_pos: CodePtr) {
|
||||
asm.pad_inval_patch();
|
||||
asm.pos_marker(move |code_ptr| {
|
||||
CodegenGlobals::push_global_inval_patch(code_ptr, outline_block_target_pos);
|
||||
});
|
||||
@ -606,19 +609,6 @@ fn gen_pc_guard(asm: &mut Assembler, iseq: IseqPtr, insn_idx: u32) {
|
||||
/// Compile an interpreter entry block to be inserted into an iseq
|
||||
/// Returns None if compilation fails.
|
||||
pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> Option<CodePtr> {
|
||||
const MAX_PROLOGUE_SIZE: usize = 1024;
|
||||
|
||||
// Check if we have enough executable memory
|
||||
if !cb.has_capacity(MAX_PROLOGUE_SIZE) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let old_write_pos = cb.get_write_pos();
|
||||
|
||||
// TODO: figure out if this is actually beneficial for performance
|
||||
// Align the current write position to cache line boundaries
|
||||
cb.align_pos(64);
|
||||
|
||||
let code_ptr = cb.get_write_ptr();
|
||||
|
||||
let mut asm = Assembler::new();
|
||||
@ -660,10 +650,11 @@ pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> O
|
||||
|
||||
asm.compile(cb);
|
||||
|
||||
// Verify MAX_PROLOGUE_SIZE
|
||||
assert!(cb.get_write_pos() - old_write_pos <= MAX_PROLOGUE_SIZE);
|
||||
|
||||
return Some(code_ptr);
|
||||
if (cb.has_dropped_bytes()) {
|
||||
None
|
||||
} else {
|
||||
Some(code_ptr)
|
||||
}
|
||||
}
|
||||
|
||||
// Generate code to check for interrupts and take a side-exit.
|
||||
@ -853,7 +844,7 @@ pub fn gen_single_block(
|
||||
{
|
||||
let mut block = jit.block.borrow_mut();
|
||||
if block.entry_exit.is_some() {
|
||||
asm.pad_entry_exit();
|
||||
asm.pad_inval_patch();
|
||||
}
|
||||
|
||||
// Compile code into the code block
|
||||
@ -6544,29 +6535,13 @@ static mut CODEGEN_GLOBALS: Option<CodegenGlobals> = None;
|
||||
impl CodegenGlobals {
|
||||
/// Initialize the codegen globals
|
||||
pub fn init() {
|
||||
// Executable memory size in MiB
|
||||
let mem_size = get_option!(exec_mem_size) * 1024 * 1024;
|
||||
// Executable memory and code page size in bytes
|
||||
let mem_size = get_option!(exec_mem_size);
|
||||
let code_page_size = get_option!(code_page_size);
|
||||
|
||||
#[cfg(not(test))]
|
||||
let (mut cb, mut ocb) = {
|
||||
// TODO(alan): we can error more gracefully when the user gives
|
||||
// --yjit-exec-mem=absurdly-large-number
|
||||
//
|
||||
// 2 GiB. It's likely a bug if we generate this much code.
|
||||
const MAX_BUFFER_SIZE: usize = 2 * 1024 * 1024 * 1024;
|
||||
assert!(mem_size <= MAX_BUFFER_SIZE);
|
||||
let mem_size_u32 = mem_size as u32;
|
||||
let half_size = mem_size / 2;
|
||||
|
||||
let page_size = unsafe { rb_yjit_get_page_size() };
|
||||
let assert_page_aligned = |ptr| assert_eq!(
|
||||
0,
|
||||
ptr as usize % page_size.as_usize(),
|
||||
"Start of virtual address block should be page-aligned",
|
||||
);
|
||||
|
||||
let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size_u32) };
|
||||
let second_half = virt_block.wrapping_add(half_size);
|
||||
let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size as u32) };
|
||||
|
||||
// Memory protection syscalls need page-aligned addresses, so check it here. Assuming
|
||||
// `virt_block` is page-aligned, `second_half` should be page-aligned as long as the
|
||||
@ -6575,26 +6550,25 @@ impl CodegenGlobals {
|
||||
//
|
||||
// Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB
|
||||
// (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though.
|
||||
assert_page_aligned(virt_block);
|
||||
assert_page_aligned(second_half);
|
||||
let page_size = unsafe { rb_yjit_get_page_size() };
|
||||
assert_eq!(
|
||||
virt_block as usize % page_size.as_usize(), 0,
|
||||
"Start of virtual address block should be page-aligned",
|
||||
);
|
||||
assert_eq!(code_page_size % page_size.as_usize(), 0, "code_page_size was not page-aligned");
|
||||
|
||||
use crate::virtualmem::*;
|
||||
|
||||
let first_half = VirtualMem::new(
|
||||
let mem_block = VirtualMem::new(
|
||||
SystemAllocator {},
|
||||
page_size,
|
||||
virt_block,
|
||||
half_size
|
||||
);
|
||||
let second_half = VirtualMem::new(
|
||||
SystemAllocator {},
|
||||
page_size,
|
||||
second_half,
|
||||
half_size
|
||||
mem_size,
|
||||
);
|
||||
let mem_block = Rc::new(RefCell::new(mem_block));
|
||||
|
||||
let cb = CodeBlock::new(first_half, false);
|
||||
let ocb = OutlinedCb::wrap(CodeBlock::new(second_half, true));
|
||||
let cb = CodeBlock::new(mem_block.clone(), code_page_size, false);
|
||||
let ocb = OutlinedCb::wrap(CodeBlock::new(mem_block, code_page_size, true));
|
||||
|
||||
(cb, ocb)
|
||||
};
|
||||
@ -6702,6 +6676,10 @@ impl CodegenGlobals {
|
||||
unsafe { CODEGEN_GLOBALS.as_mut().unwrap() }
|
||||
}
|
||||
|
||||
pub fn has_instance() -> bool {
|
||||
unsafe { CODEGEN_GLOBALS.as_mut().is_some() }
|
||||
}
|
||||
|
||||
/// Get a mutable reference to the inline code block
|
||||
pub fn get_inline_cb() -> &'static mut CodeBlock {
|
||||
&mut CodegenGlobals::get_instance().inline_cb
|
||||
|
@ -665,7 +665,7 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
|
||||
if new_addr != object {
|
||||
for (byte_idx, &byte) in new_addr.as_u64().to_le_bytes().iter().enumerate() {
|
||||
let byte_code_ptr = value_code_ptr.add_bytes(byte_idx);
|
||||
cb.get_mem().write_byte(byte_code_ptr, byte)
|
||||
cb.write_mem(byte_code_ptr, byte)
|
||||
.expect("patching existing code should be within bounds");
|
||||
}
|
||||
}
|
||||
@ -1916,7 +1916,9 @@ pub fn gen_branch(
|
||||
|
||||
// Call the branch generation function
|
||||
asm.mark_branch_start(&branchref);
|
||||
gen_fn(asm, branch.dst_addrs[0].unwrap(), branch.dst_addrs[1], BranchShape::Default);
|
||||
if let Some(dst_addr) = branch.dst_addrs[0] {
|
||||
gen_fn(asm, dst_addr, branch.dst_addrs[1], BranchShape::Default);
|
||||
}
|
||||
asm.mark_branch_end(&branchref);
|
||||
}
|
||||
|
||||
@ -1955,6 +1957,7 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, asm: &mu
|
||||
branch.shape = BranchShape::Default;
|
||||
|
||||
// Call the branch generation function
|
||||
asm.comment("gen_direct_jmp: existing block");
|
||||
asm.mark_branch_start(&branchref);
|
||||
gen_jump_branch(asm, branch.dst_addrs[0].unwrap(), None, BranchShape::Default);
|
||||
asm.mark_branch_end(&branchref);
|
||||
@ -1965,6 +1968,7 @@ pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, asm: &mu
|
||||
branch.shape = BranchShape::Next0;
|
||||
|
||||
// The branch is effectively empty (a noop)
|
||||
asm.comment("gen_direct_jmp: fallthrough");
|
||||
asm.mark_branch_start(&branchref);
|
||||
asm.mark_branch_end(&branchref);
|
||||
}
|
||||
@ -2003,7 +2007,9 @@ pub fn defer_compilation(
|
||||
|
||||
// Call the branch generation function
|
||||
asm.mark_branch_start(&branch_rc);
|
||||
gen_jump_branch(asm, branch.dst_addrs[0].unwrap(), None, BranchShape::Default);
|
||||
if let Some(dst_addr) = branch.dst_addrs[0] {
|
||||
gen_jump_branch(asm, dst_addr, None, BranchShape::Default);
|
||||
}
|
||||
asm.mark_branch_end(&branch_rc);
|
||||
}
|
||||
|
||||
|
@ -4,9 +4,14 @@ use std::ffi::CStr;
|
||||
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||
#[repr(C)]
|
||||
pub struct Options {
|
||||
// Size of the executable memory block to allocate in MiB
|
||||
// Size of the executable memory block to allocate in bytes
|
||||
// Note that the command line argument is expressed in MiB and not bytes
|
||||
pub exec_mem_size: usize,
|
||||
|
||||
// Size of each executable memory code page in bytes
|
||||
// Note that the command line argument is expressed in KiB and not bytes
|
||||
pub code_page_size: usize,
|
||||
|
||||
// Number of method calls after which to start generating code
|
||||
// Threshold==1 means compile on first execution
|
||||
pub call_threshold: usize,
|
||||
@ -48,7 +53,8 @@ pub struct Options {
|
||||
|
||||
// Initialize the options to default values
|
||||
pub static mut OPTIONS: Options = Options {
|
||||
exec_mem_size: 256,
|
||||
exec_mem_size: 256 * 1024 * 1024,
|
||||
code_page_size: 16 * 1024,
|
||||
call_threshold: 10,
|
||||
greedy_versioning: false,
|
||||
no_type_prop: false,
|
||||
@ -118,8 +124,30 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
|
||||
match (opt_name, opt_val) {
|
||||
("", "") => (), // Simply --yjit
|
||||
|
||||
("exec-mem-size", _) => match opt_val.parse() {
|
||||
Ok(n) => unsafe { OPTIONS.exec_mem_size = n },
|
||||
("exec-mem-size", _) => match opt_val.parse::<usize>() {
|
||||
Ok(n) => {
|
||||
if n == 0 || n > 2 * 1024 * 1024 {
|
||||
return None
|
||||
}
|
||||
|
||||
// Convert from MiB to bytes internally for convenience
|
||||
unsafe { OPTIONS.exec_mem_size = n * 1024 * 1024 }
|
||||
}
|
||||
Err(_) => {
|
||||
return None;
|
||||
}
|
||||
},
|
||||
|
||||
("code-page-size", _) => match opt_val.parse::<usize>() {
|
||||
Ok(n) => {
|
||||
// Enforce bounds checks and that n is divisible by 4KiB
|
||||
if n < 4 || n > 256 || n % 4 != 0 {
|
||||
return None
|
||||
}
|
||||
|
||||
// Convert from KiB to bytes internally for convenience
|
||||
unsafe { OPTIONS.code_page_size = n * 1024 }
|
||||
}
|
||||
Err(_) => {
|
||||
return None;
|
||||
}
|
||||
|
@ -74,14 +74,13 @@ pub(crate) use offset_of;
|
||||
// This should work fine on ASCII strings and anything else
|
||||
// that is considered legal UTF-8, including embedded nulls.
|
||||
fn ruby_str_to_rust(v: VALUE) -> String {
|
||||
// Make sure the CRuby encoding is UTF-8 compatible
|
||||
let encoding = unsafe { rb_ENCODING_GET(v) } as u32;
|
||||
assert!(encoding == RUBY_ENCINDEX_ASCII_8BIT || encoding == RUBY_ENCINDEX_UTF_8 || encoding == RUBY_ENCINDEX_US_ASCII);
|
||||
|
||||
let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8;
|
||||
let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap();
|
||||
let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) };
|
||||
String::from_utf8(str_slice.to_vec()).unwrap() // does utf8 validation
|
||||
match String::from_utf8(str_slice.to_vec()) {
|
||||
Ok(utf8) => utf8,
|
||||
Err(_) => String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
// Location is the file defining the method, colon, method name.
|
||||
|
Loading…
x
Reference in New Issue
Block a user