YJIT: Pass method arguments using registers (#11280)

* YJIT: Pass method arguments using registers

* s/at_current_insn/at_compile_target/

* Implement register shuffle
This commit is contained in:
Takashi Kokubun 2024-08-27 17:04:43 -07:00 committed by GitHub
parent f2ac013009
commit 5b129c899a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
Notes: git 2024-08-28 00:05:03 +00:00
Merged-By: k0kubun <takashikkbn@gmail.com>
4 changed files with 330 additions and 124 deletions

View File

@ -2256,6 +2256,34 @@ assert_equal '7', %q{
foo(5,2)
}
# regression test for argument registers with invalidation
assert_equal '[0, 1, 2]', %q{
def test(n)
ret = n
binding
ret
end
[0, 1, 2].map do |n|
test(n)
end
}
# regression test for argument registers
assert_equal 'true', %q{
class Foo
def ==(other)
other == nil
end
end
def test
[Foo.new].include?(Foo.new)
end
test
}
# test pattern matching
assert_equal '[:ok, :ok]', %q{
class C

View File

@ -1032,7 +1032,7 @@ pub struct Assembler {
pub ctx: Context,
/// The current ISEQ's local table size. asm.local_opnd() uses this, and it's
/// sometimes hard to pass this value, e.g. asm.spill_temps() in asm.ccall().
/// sometimes hard to pass this value, e.g. asm.spill_regs() in asm.ccall().
///
/// `None` means we're not assembling for an ISEQ, or that the local size is
/// not relevant.
@ -1241,8 +1241,36 @@ impl Assembler
self.ctx.clear_local_types();
}
/// Repurpose stack temp registers to the corresponding locals for arguments
pub fn map_temp_regs_to_args(&mut self, callee_ctx: &mut Context, argc: i32) -> Vec<RegOpnd> {
let mut callee_reg_mapping = callee_ctx.get_reg_mapping();
let mut mapped_temps = vec![];
for arg_idx in 0..argc {
let stack_idx: u8 = (self.ctx.get_stack_size() as i32 - argc + arg_idx).try_into().unwrap();
let temp_opnd = RegOpnd::Stack(stack_idx);
// For each argument, if the stack temp for it has a register,
// let the callee use the register for the local variable.
if let Some(reg_idx) = self.ctx.get_reg_mapping().get_reg(temp_opnd) {
let local_opnd = RegOpnd::Local(arg_idx.try_into().unwrap());
callee_reg_mapping.set_reg(local_opnd, reg_idx);
mapped_temps.push(temp_opnd);
}
}
asm_comment!(self, "local maps: {:?}", callee_reg_mapping);
callee_ctx.set_reg_mapping(callee_reg_mapping);
mapped_temps
}
/// Spill all live registers to the stack
pub fn spill_regs(&mut self) {
self.spill_regs_except(&vec![]);
}
/// Spill all live registers except `ignored_temps` to the stack
pub fn spill_regs_except(&mut self, ignored_temps: &Vec<RegOpnd>) {
// Forget registers above the stack top
let mut reg_mapping = self.ctx.get_reg_mapping();
for stack_idx in self.ctx.get_stack_size()..MAX_CTX_TEMPS as u8 {
@ -1250,36 +1278,46 @@ impl Assembler
}
self.set_reg_mapping(reg_mapping);
// Spill live stack temps
if self.ctx.get_reg_mapping() != RegMapping::default() {
asm_comment!(self, "spill_temps: {:?} -> {:?}", self.ctx.get_reg_mapping(), RegMapping::default());
// If no registers are in use, skip all checks
if self.ctx.get_reg_mapping() == RegMapping::default() {
return;
}
// Spill stack temps
// Collect stack temps to be spilled
let mut spilled_opnds = vec![];
for stack_idx in 0..u8::min(MAX_CTX_TEMPS as u8, self.ctx.get_stack_size()) {
if reg_mapping.dealloc_reg(RegOpnd::Stack(stack_idx)) {
let reg_opnd = RegOpnd::Stack(stack_idx);
if !ignored_temps.contains(&reg_opnd) && reg_mapping.dealloc_reg(reg_opnd) {
let idx = self.ctx.get_stack_size() - 1 - stack_idx;
self.spill_temp(self.stack_opnd(idx.into()));
let spilled_opnd = self.stack_opnd(idx.into());
spilled_opnds.push(spilled_opnd);
reg_mapping.dealloc_reg(spilled_opnd.reg_opnd());
}
}
// Spill locals
// Collect locals to be spilled
for local_idx in 0..MAX_CTX_TEMPS as u8 {
if reg_mapping.dealloc_reg(RegOpnd::Local(local_idx)) {
let first_local_ep_offset = self.num_locals.unwrap() + VM_ENV_DATA_SIZE - 1;
let ep_offset = first_local_ep_offset - local_idx as u32;
self.spill_temp(self.local_opnd(ep_offset));
let spilled_opnd = self.local_opnd(ep_offset);
spilled_opnds.push(spilled_opnd);
reg_mapping.dealloc_reg(spilled_opnd.reg_opnd());
}
}
// Spill stack temps and locals
if !spilled_opnds.is_empty() {
asm_comment!(self, "spill_regs: {:?} -> {:?}", self.ctx.get_reg_mapping(), reg_mapping);
for &spilled_opnd in spilled_opnds.iter() {
self.spill_reg(spilled_opnd);
}
self.ctx.set_reg_mapping(reg_mapping);
}
// Every stack temp should have been spilled
assert_eq!(self.ctx.get_reg_mapping(), RegMapping::default());
}
/// Spill a stack temp from a register to the stack
fn spill_temp(&mut self, opnd: Opnd) {
fn spill_reg(&mut self, opnd: Opnd) {
assert_ne!(self.ctx.get_reg_mapping().get_reg(opnd.reg_opnd()), None);
// Use different RegMappings for dest and src operands
@ -1308,6 +1346,42 @@ impl Assembler
}
}
// Shuffle register moves, sometimes adding extra moves using SCRATCH_REG,
// so that they will not rewrite each other before they are used.
pub fn reorder_reg_moves(old_moves: &Vec<(Reg, Opnd)>) -> Vec<(Reg, Opnd)> {
// Return the index of a move whose destination is not used as a source if any.
fn find_safe_move(moves: &Vec<(Reg, Opnd)>) -> Option<usize> {
moves.iter().enumerate().find(|(_, &(dest_reg, _))| {
moves.iter().all(|&(_, src_opnd)| src_opnd != Opnd::Reg(dest_reg))
}).map(|(index, _)| index)
}
// Remove moves whose source and destination are the same
let mut old_moves: Vec<(Reg, Opnd)> = old_moves.clone().into_iter()
.filter(|&(reg, opnd)| Opnd::Reg(reg) != opnd).collect();
let mut new_moves = vec![];
while old_moves.len() > 0 {
// Keep taking safe moves
while let Some(index) = find_safe_move(&old_moves) {
new_moves.push(old_moves.remove(index));
}
// No safe move. Load the source of one move into SCRATCH_REG, and
// then load SCRATCH_REG into the destination when it's safe.
if old_moves.len() > 0 {
// Make sure it's safe to use SCRATCH_REG
assert!(old_moves.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG)));
// Move SCRATCH <- opnd, and delay reg <- SCRATCH
let (reg, opnd) = old_moves.remove(0);
new_moves.push((Assembler::SCRATCH_REG, opnd));
old_moves.push((reg, Opnd::Reg(Assembler::SCRATCH_REG)));
}
}
new_moves
}
/// Sets the out field on the various instructions that require allocated
/// registers because their output is used as the operand on a subsequent
/// instruction. This is our implementation of the linear scan algorithm.
@ -1353,42 +1427,6 @@ impl Assembler
}
}
// Reorder C argument moves, sometimes adding extra moves using SCRATCH_REG,
// so that they will not rewrite each other before they are used.
fn reorder_c_args(c_args: &Vec<(Reg, Opnd)>) -> Vec<(Reg, Opnd)> {
// Return the index of a move whose destination is not used as a source if any.
fn find_safe_arg(c_args: &Vec<(Reg, Opnd)>) -> Option<usize> {
c_args.iter().enumerate().find(|(_, &(dest_reg, _))| {
c_args.iter().all(|&(_, src_opnd)| src_opnd != Opnd::Reg(dest_reg))
}).map(|(index, _)| index)
}
// Remove moves whose source and destination are the same
let mut c_args: Vec<(Reg, Opnd)> = c_args.clone().into_iter()
.filter(|&(reg, opnd)| Opnd::Reg(reg) != opnd).collect();
let mut moves = vec![];
while c_args.len() > 0 {
// Keep taking safe moves
while let Some(index) = find_safe_arg(&c_args) {
moves.push(c_args.remove(index));
}
// No safe move. Load the source of one move into SCRATCH_REG, and
// then load SCRATCH_REG into the destination when it's safe.
if c_args.len() > 0 {
// Make sure it's safe to use SCRATCH_REG
assert!(c_args.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG)));
// Move SCRATCH <- opnd, and delay reg <- SCRATCH
let (reg, opnd) = c_args.remove(0);
moves.push((Assembler::SCRATCH_REG, opnd));
c_args.push((reg, Opnd::Reg(Assembler::SCRATCH_REG)));
}
}
moves
}
// Adjust the number of entries in live_ranges so that it can be indexed by mapped indexes.
fn shift_live_ranges(live_ranges: &mut Vec<usize>, start_index: usize, shift_offset: isize) {
if shift_offset >= 0 {
@ -1564,7 +1602,7 @@ impl Assembler
if c_args.len() > 0 {
// Resolve C argument dependencies
let c_args_len = c_args.len() as isize;
let moves = reorder_c_args(&c_args.drain(..).into_iter().collect());
let moves = Self::reorder_reg_moves(&c_args.drain(..).into_iter().collect());
shift_live_ranges(&mut shifted_live_ranges, asm.insns.len(), moves.len() as isize - c_args_len);
// Push batched C arguments
@ -1808,7 +1846,7 @@ impl Assembler {
// Mark all temps as not being in registers.
// Temps will be marked back as being in registers by cpop_all.
// We assume that cpush_all + cpop_all are used for C functions in utils.rs
// that don't require spill_temps for GC.
// that don't require spill_regs for GC.
self.set_reg_mapping(RegMapping::default());
}

View File

@ -3,6 +3,7 @@
use crate::asm::*;
use crate::backend::ir::*;
use crate::backend::current::TEMP_REGS;
use crate::core::*;
use crate::cruby::*;
use crate::invariants::*;
@ -114,10 +115,13 @@ pub struct JITState<'a> {
/// Stack of symbol names for --yjit-perf
perf_stack: Vec<String>,
/// When true, this block is the first block compiled by gen_block_series().
first_block: bool,
}
impl<'a> JITState<'a> {
pub fn new(blockid: BlockId, starting_ctx: Context, output_ptr: CodePtr, ec: EcPtr, ocb: &'a mut OutlinedCb) -> Self {
pub fn new(blockid: BlockId, starting_ctx: Context, output_ptr: CodePtr, ec: EcPtr, ocb: &'a mut OutlinedCb, first_block: bool) -> Self {
JITState {
iseq: blockid.iseq,
starting_insn_idx: blockid.idx,
@ -140,6 +144,7 @@ impl<'a> JITState<'a> {
block_assumes_single_ractor: false,
perf_map: Rc::default(),
perf_stack: vec![],
first_block,
}
}
@ -212,9 +217,16 @@ impl<'a> JITState<'a> {
self.next_insn_idx() + insn_len(next_opcode) as u16
}
// Check if we are compiling the instruction at the stub PC
// Check if we are compiling the instruction at the stub PC with the target Context
// Meaning we are compiling the instruction that is next to execute
pub fn at_current_insn(&self) -> bool {
pub fn at_compile_target(&self) -> bool {
// If this is not the first block compiled by gen_block_series(),
// it might be compiling the same block again with a different Context.
// In that case, it should defer_compilation() and inspect the stack there.
if !self.first_block {
return false;
}
let ec_pc: *mut VALUE = unsafe { get_cfp_pc(self.get_cfp()) };
ec_pc == self.pc
}
@ -222,7 +234,7 @@ impl<'a> JITState<'a> {
// Peek at the nth topmost value on the Ruby stack.
// Returns the topmost value when n == 0.
pub fn peek_at_stack(&self, ctx: &Context, n: isize) -> VALUE {
assert!(self.at_current_insn());
assert!(self.at_compile_target());
assert!(n < ctx.get_stack_size() as isize);
// Note: this does not account for ctx->sp_offset because
@ -241,7 +253,7 @@ impl<'a> JITState<'a> {
}
fn peek_at_local(&self, n: i32) -> VALUE {
assert!(self.at_current_insn());
assert!(self.at_compile_target());
let local_table_size: isize = unsafe { get_iseq_body_local_table_size(self.iseq) }
.try_into()
@ -257,7 +269,7 @@ impl<'a> JITState<'a> {
}
fn peek_at_block_handler(&self, level: u32) -> VALUE {
assert!(self.at_current_insn());
assert!(self.at_compile_target());
unsafe {
let ep = get_cfp_ep_level(self.get_cfp(), level);
@ -656,7 +668,7 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
}
// Only able to check types when at current insn
assert!(jit.at_current_insn());
assert!(jit.at_compile_target());
let self_val = jit.peek_at_self();
let self_val_type = Type::from(self_val);
@ -1172,6 +1184,7 @@ pub fn gen_single_block(
ec: EcPtr,
cb: &mut CodeBlock,
ocb: &mut OutlinedCb,
first_block: bool,
) -> Result<BlockRef, ()> {
// Limit the number of specialized versions for this block
let ctx = limit_block_versions(blockid, start_ctx);
@ -1195,7 +1208,7 @@ pub fn gen_single_block(
let mut insn_idx: IseqIdx = blockid.idx;
// Initialize a JIT state object
let mut jit = JITState::new(blockid, ctx, cb.get_write_ptr(), ec, ocb);
let mut jit = JITState::new(blockid, ctx, cb.get_write_ptr(), ec, ocb, first_block);
jit.iseq = blockid.iseq;
// Create a backend assembler instance
@ -1265,7 +1278,7 @@ pub fn gen_single_block(
}
// In debug mode, verify our existing assumption
if cfg!(debug_assertions) && get_option!(verify_ctx) && jit.at_current_insn() {
if cfg!(debug_assertions) && get_option!(verify_ctx) && jit.at_compile_target() {
verify_ctx(&jit, &asm.ctx);
}
@ -1508,7 +1521,7 @@ fn fuse_putobject_opt_ltlt(
if shift_amt > 63 || shift_amt < 0 {
return None;
}
if !jit.at_current_insn() {
if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@ -1772,7 +1785,7 @@ fn gen_splatkw(
asm: &mut Assembler,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime hash operand
if !jit.at_current_insn() {
if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@ -2146,7 +2159,7 @@ fn gen_expandarray(
let array_opnd = asm.stack_opnd(0);
// Defer compilation so we can specialize on a runtime `self`
if !jit.at_current_insn() {
if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@ -2345,7 +2358,16 @@ fn gen_getlocal_generic(
// Load the local from the block
// val = *(vm_get_ep(GET_EP(), level) - idx);
let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32);
Opnd::mem(64, ep_opnd, offs)
let local_opnd = Opnd::mem(64, ep_opnd, offs);
// Write back an argument register to the stack. If the local variable
// is an argument, it might have an allocated register, but if this ISEQ
// is known to escape EP, the register shouldn't be used after this getlocal.
if level == 0 && asm.ctx.get_reg_mapping().get_reg(asm.local_opnd(ep_offset).reg_opnd()).is_some() {
asm.mov(local_opnd, asm.local_opnd(ep_offset));
}
local_opnd
};
// Write the local at SP
@ -2425,6 +2447,13 @@ fn gen_setlocal_generic(
asm.alloc_reg(local_opnd.reg_opnd());
(flags_opnd, local_opnd)
} else {
// Make sure getlocal doesn't read a stale register. If the local variable
// is an argument, it might have an allocated register, but if this ISEQ
// is known to escape EP, the register shouldn't be used after this setlocal.
if level == 0 {
asm.ctx.dealloc_reg(asm.local_opnd(ep_offset).reg_opnd());
}
// Load flags and the local for the level
let ep_opnd = gen_get_ep(asm, level);
let flags_opnd = Opnd::mem(
@ -2627,11 +2656,11 @@ fn gen_checkkeyword(
// The index of the keyword we want to check
let index: i64 = jit.get_arg(1).as_i64();
// Load environment pointer EP
let ep_opnd = gen_get_ep(asm, 0);
// VALUE kw_bits = *(ep - bits);
let bits_opnd = Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * -bits_offset);
// `unspecified_bits` is a part of the local table. Therefore, we may allocate a register for
// that "local" when passing it as an argument. We must use such a register to avoid loading
// random bits from the stack if any. We assume that EP is not escaped as of entering a method
// with keyword arguments.
let bits_opnd = asm.local_opnd(bits_offset as u32);
// unsigned int b = (unsigned int)FIX2ULONG(kw_bits);
// if ((b & (0x01 << idx))) {
@ -2846,7 +2875,7 @@ fn gen_getinstancevariable(
asm: &mut Assembler,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime `self`
if !jit.at_current_insn() {
if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@ -2910,7 +2939,7 @@ fn gen_setinstancevariable(
asm: &mut Assembler,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime `self`
if !jit.at_current_insn() {
if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@ -3221,7 +3250,7 @@ fn gen_definedivar(
asm: &mut Assembler,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize base on a runtime receiver
if !jit.at_current_insn() {
if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@ -3550,7 +3579,7 @@ fn gen_equality_specialized(
return Some(true);
}
if !jit.at_current_insn() {
if !jit.at_compile_target() {
return None;
}
let comptime_a = jit.peek_at_stack(&asm.ctx, 1);
@ -3669,7 +3698,7 @@ fn gen_opt_aref(
}
// Defer compilation so we can specialize base on a runtime receiver
if !jit.at_current_insn() {
if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@ -3770,7 +3799,7 @@ fn gen_opt_aset(
asm: &mut Assembler,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime `self`
if !jit.at_current_insn() {
if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@ -4376,7 +4405,7 @@ fn gen_opt_case_dispatch(
// We'd hope that our jitted code will be sufficiently fast without the
// hash lookup, at least for small hashes, but it's worth revisiting this
// assumption in the future.
if !jit.at_current_insn() {
if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@ -6433,14 +6462,6 @@ fn gen_push_frame(
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SELF), frame.recv);
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_BLOCK_CODE), 0.into());
if frame.iseq.is_some() {
// Spill stack temps to let the callee use them (must be done before changing the SP register)
asm.spill_regs();
// Saving SP before calculating ep avoids a dependency on a register
// However this must be done after referencing frame.recv, which may be SP-relative
asm.mov(SP, sp);
}
let ep = asm.sub(sp, SIZEOF_VALUE.into());
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_EP), ep);
}
@ -7770,9 +7791,32 @@ fn gen_send_iseq(
pc: None, // We are calling into jitted code, which will set the PC as necessary
}));
// Create a context for the callee
let mut callee_ctx = Context::default();
// Transfer some stack temp registers to the callee's locals for arguments.
let mapped_temps = if !forwarding {
asm.map_temp_regs_to_args(&mut callee_ctx, argc)
} else {
// When forwarding, the callee's local table has only a callinfo,
// so we can't map the actual arguments to the callee's locals.
vec![]
};
// Spill stack temps and locals that are not used by the callee.
// This must be done before changing the SP register.
asm.spill_regs_except(&mapped_temps);
// Saving SP before calculating ep avoids a dependency on a register
// However this must be done after referencing frame.recv, which may be SP-relative
asm.mov(SP, callee_sp);
// Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs.
// We also do this after gen_push_frame() to minimize the impact of spill_temps() on asm.ccall().
if get_option!(gen_stats) {
// Protect caller-saved registers in case they're used for arguments
asm.cpush_all();
// Assemble the ISEQ name string
let name_str = get_iseq_name(iseq);
@ -7781,6 +7825,7 @@ fn gen_send_iseq(
// Increment the counter for this cfunc
asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]);
asm.cpop_all();
}
// No need to set cfp->pc since the callee sets it whenever calling into routines
@ -7794,9 +7839,6 @@ fn gen_send_iseq(
idx: jit.next_insn_idx(),
};
// Create a context for the callee
let mut callee_ctx = Context::default();
// If the callee has :inline_block annotation and the callsite has a block ISEQ,
// duplicate a callee block for each block ISEQ to make its `yield` monomorphic.
if let (Some(BlockHandler::BlockISeq(iseq)), true) = (block, builtin_attrs & BUILTIN_ATTR_INLINE_BLOCK != 0) {
@ -7816,6 +7858,7 @@ fn gen_send_iseq(
callee_ctx.set_local_type(0, Type::Unknown)
}
// Set the receiver type in the callee's context
let recv_type = if captured_self {
Type::Unknown // we don't track the type information of captured->self for now
} else {
@ -7823,6 +7866,29 @@ fn gen_send_iseq(
};
callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type);
// Now that callee_ctx is prepared, discover a block that can be reused if we move some registers.
// If there's such a block, move registers accordingly to avoid creating a new block.
let blockid = BlockId { iseq, idx: start_pc_offset };
if !mapped_temps.is_empty() {
// Discover a block that have the same things in different (or same) registers
if let Some(block_ctx) = find_block_ctx_with_same_regs(blockid, &callee_ctx) {
// List pairs of moves for making the register mappings compatible
let mut moves = vec![];
for &reg_opnd in callee_ctx.get_reg_mapping().get_reg_opnds().iter() {
let old_reg = TEMP_REGS[callee_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()];
let new_reg = TEMP_REGS[block_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()];
moves.push((new_reg, Opnd::Reg(old_reg)));
}
// Shuffle them to break cycles and generate the moves
let moves = Assembler::reorder_reg_moves(&moves);
for (reg, opnd) in moves {
asm.load_into(Opnd::Reg(reg), opnd);
}
callee_ctx.set_reg_mapping(block_ctx.get_reg_mapping());
}
}
// The callee might change locals through Kernel#binding and other means.
asm.clear_local_types();
@ -7856,10 +7922,7 @@ fn gen_send_iseq(
gen_direct_jump(
jit,
&callee_ctx,
BlockId {
iseq: iseq,
idx: start_pc_offset,
},
blockid,
asm,
);
@ -8541,7 +8604,7 @@ fn gen_send_general(
let mut flags = unsafe { vm_ci_flag(ci) };
// Defer compilation so we can specialize on class of receiver
if !jit.at_current_insn() {
if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@ -9102,7 +9165,7 @@ fn gen_invokeblock_specialized(
asm: &mut Assembler,
cd: *const rb_call_data,
) -> Option<CodegenStatus> {
if !jit.at_current_insn() {
if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@ -9265,7 +9328,7 @@ fn gen_invokesuper_specialized(
cd: *const rb_call_data,
) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on class of receiver
if !jit.at_current_insn() {
if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@ -9499,7 +9562,7 @@ fn gen_objtostring(
jit: &mut JITState,
asm: &mut Assembler,
) -> Option<CodegenStatus> {
if !jit.at_current_insn() {
if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@ -9842,7 +9905,7 @@ fn gen_getblockparamproxy(
jit: &mut JITState,
asm: &mut Assembler,
) -> Option<CodegenStatus> {
if !jit.at_current_insn() {
if !jit.at_compile_target() {
defer_compilation(jit, asm);
return Some(EndBlock);
}
@ -10593,6 +10656,7 @@ mod tests {
cb.get_write_ptr(),
ptr::null(), // No execution context in tests. No peeking!
ocb,
true,
)
}

View File

@ -371,6 +371,12 @@ impl RegMapping {
.map(|(reg_idx, _)| reg_idx)
}
/// Set a given operand to the register at a given index.
pub fn set_reg(&mut self, opnd: RegOpnd, reg_idx: usize) {
assert!(self.0[reg_idx].is_none());
self.0[reg_idx] = Some(opnd);
}
/// Allocate a register for a given operand if available.
/// Return true if self is updated.
pub fn alloc_reg(&mut self, opnd: RegOpnd) -> bool {
@ -435,6 +441,32 @@ impl RegMapping {
RegOpnd::Local(_) => index_temps.rev().find(|(_, reg_opnd)| reg_opnd.is_none()),
}.map(|(index, _)| index)
}
/// Return a vector of RegOpnds that have an allocated register
pub fn get_reg_opnds(&self) -> Vec<RegOpnd> {
self.0.iter().filter_map(|&reg_opnd| reg_opnd).collect()
}
/// Return TypeDiff::Compatible(diff) if dst has a mapping that can be made by moving registers
/// in self `diff` times. TypeDiff::Incompatible if they have different things in registers.
pub fn diff(&self, dst: RegMapping) -> TypeDiff {
let src_opnds = self.get_reg_opnds();
let dst_opnds = dst.get_reg_opnds();
if src_opnds.len() != dst_opnds.len() {
return TypeDiff::Incompatible;
}
let mut diff = 0;
for &reg_opnd in src_opnds.iter() {
match (self.get_reg(reg_opnd), dst.get_reg(reg_opnd)) {
(Some(src_idx), Some(dst_idx)) => if src_idx != dst_idx {
diff += 1;
}
_ => return TypeDiff::Incompatible,
}
}
TypeDiff::Compatible(diff)
}
}
impl fmt::Debug for RegMapping {
@ -2080,9 +2112,8 @@ pub fn take_version_list(blockid: BlockId) -> VersionList {
}
}
/// Count the number of block versions matching a given blockid
/// `inlined: true` counts inlined versions, and `inlined: false` counts other versions.
fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
/// Count the number of block versions that match a given BlockId and part of a Context
fn get_num_versions(blockid: BlockId, ctx: &Context) -> usize {
let insn_idx = blockid.idx.as_usize();
match get_iseq_payload(blockid.iseq) {
@ -2094,9 +2125,14 @@ fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
.version_map
.get(insn_idx)
.map(|versions| {
versions.iter().filter(|&&version|
Context::decode(unsafe { version.as_ref() }.ctx).inline() == inlined
).count()
versions.iter().filter(|&&version| {
let version_ctx = Context::decode(unsafe { version.as_ref() }.ctx);
// Inline versions are counted separately towards MAX_INLINE_VERSIONS.
version_ctx.inline() == ctx.inline() &&
// find_block_versions() finds only blocks with compatible reg_mapping,
// so count only versions with compatible reg_mapping.
version_ctx.reg_mapping == ctx.reg_mapping
}).count()
})
.unwrap_or(0)
}
@ -2128,10 +2164,7 @@ pub fn get_or_create_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> {
/// Retrieve a basic block version for an (iseq, idx) tuple
/// This will return None if no version is found
fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
let versions = match get_version_list(blockid) {
Some(versions) => versions,
None => return None,
};
let versions = get_version_list(blockid)?;
// Best match found
let mut best_version: Option<BlockRef> = None;
@ -2156,6 +2189,33 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
return best_version;
}
/// Basically find_block_version() but allows RegMapping incompatibility
/// that can be fixed by register moves and returns Context
pub fn find_block_ctx_with_same_regs(blockid: BlockId, ctx: &Context) -> Option<Context> {
let versions = get_version_list(blockid)?;
// Best match found
let mut best_ctx: Option<Context> = None;
let mut best_diff = usize::MAX;
// For each version matching the blockid
for blockref in versions.iter() {
let block = unsafe { blockref.as_ref() };
let block_ctx = Context::decode(block.ctx);
// Discover the best block that is compatible if we move registers
match ctx.diff_with_same_regs(&block_ctx) {
TypeDiff::Compatible(diff) if diff < best_diff => {
best_ctx = Some(block_ctx);
best_diff = diff;
}
_ => {}
}
}
best_ctx
}
/// Allow inlining a Block up to MAX_INLINE_VERSIONS times.
const MAX_INLINE_VERSIONS: usize = 1000;
@ -2166,7 +2226,7 @@ pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
return *ctx;
}
let next_versions = get_num_versions(blockid, ctx.inline()) + 1;
let next_versions = get_num_versions(blockid, ctx) + 1;
let max_versions = if ctx.inline() {
MAX_INLINE_VERSIONS
} else {
@ -2782,8 +2842,24 @@ impl Context {
return TypeDiff::Compatible(diff);
}
/// Basically diff() but allows RegMapping incompatibility that can be fixed
/// by register moves.
pub fn diff_with_same_regs(&self, dst: &Context) -> TypeDiff {
// Prepare a Context with the same registers
let mut dst_with_same_regs = dst.clone();
dst_with_same_regs.set_reg_mapping(self.get_reg_mapping());
// Diff registers and other stuff separately, and merge them
match (self.diff(&dst_with_same_regs), self.get_reg_mapping().diff(dst.get_reg_mapping())) {
(TypeDiff::Compatible(ctx_diff), TypeDiff::Compatible(reg_diff)) => {
TypeDiff::Compatible(ctx_diff + reg_diff)
}
_ => TypeDiff::Incompatible
}
}
pub fn two_fixnums_on_stack(&self, jit: &mut JITState) -> Option<bool> {
if jit.at_current_insn() {
if jit.at_compile_target() {
let comptime_recv = jit.peek_at_stack(self, 1);
let comptime_arg = jit.peek_at_stack(self, 0);
return Some(comptime_recv.fixnum_p() && comptime_arg.fixnum_p());
@ -2955,7 +3031,7 @@ fn gen_block_series_body(
let mut batch = Vec::with_capacity(EXPECTED_BATCH_SIZE);
// Generate code for the first block
let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb).ok()?;
let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb, true).ok()?;
batch.push(first_block); // Keep track of this block version
// Add the block version to the VersionMap for this ISEQ
@ -2996,7 +3072,7 @@ fn gen_block_series_body(
// Generate new block using context from the last branch.
let requested_ctx = Context::decode(requested_ctx);
let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb);
let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb, false);
// If the block failed to compile
if result.is_err() {
@ -4312,7 +4388,7 @@ mod tests {
let cb = CodeBlock::new_dummy(1024);
let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(1024));
let dumm_addr = cb.get_write_ptr();
let block = JITState::new(blockid, Context::default(), dumm_addr, ptr::null(), &mut ocb)
let block = JITState::new(blockid, Context::default(), dumm_addr, ptr::null(), &mut ocb, true)
.into_block(0, dumm_addr, dumm_addr, vec![]);
let _dropper = BlockDropper(block);