YJIT: Pass method arguments using registers (#11280)

* YJIT: Pass method arguments using registers

* s/at_current_insn/at_compile_target/

* Implement register shuffle
This commit is contained in:
Takashi Kokubun 2024-08-27 17:04:43 -07:00 committed by GitHub
parent f2ac013009
commit 5b129c899a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
Notes: git 2024-08-28 00:05:03 +00:00
Merged-By: k0kubun <takashikkbn@gmail.com>
4 changed files with 330 additions and 124 deletions

View File

@ -2256,6 +2256,34 @@ assert_equal '7', %q{
foo(5,2) foo(5,2)
} }
# regression test for argument registers with invalidation
assert_equal '[0, 1, 2]', %q{
def test(n)
ret = n
binding
ret
end
[0, 1, 2].map do |n|
test(n)
end
}
# regression test for argument registers
assert_equal 'true', %q{
class Foo
def ==(other)
other == nil
end
end
def test
[Foo.new].include?(Foo.new)
end
test
}
# test pattern matching # test pattern matching
assert_equal '[:ok, :ok]', %q{ assert_equal '[:ok, :ok]', %q{
class C class C

View File

@ -1032,7 +1032,7 @@ pub struct Assembler {
pub ctx: Context, pub ctx: Context,
/// The current ISEQ's local table size. asm.local_opnd() uses this, and it's /// The current ISEQ's local table size. asm.local_opnd() uses this, and it's
/// sometimes hard to pass this value, e.g. asm.spill_temps() in asm.ccall(). /// sometimes hard to pass this value, e.g. asm.spill_regs() in asm.ccall().
/// ///
/// `None` means we're not assembling for an ISEQ, or that the local size is /// `None` means we're not assembling for an ISEQ, or that the local size is
/// not relevant. /// not relevant.
@ -1241,8 +1241,36 @@ impl Assembler
self.ctx.clear_local_types(); self.ctx.clear_local_types();
} }
/// Repurpose stack temp registers to the corresponding locals for arguments
pub fn map_temp_regs_to_args(&mut self, callee_ctx: &mut Context, argc: i32) -> Vec<RegOpnd> {
let mut callee_reg_mapping = callee_ctx.get_reg_mapping();
let mut mapped_temps = vec![];
for arg_idx in 0..argc {
let stack_idx: u8 = (self.ctx.get_stack_size() as i32 - argc + arg_idx).try_into().unwrap();
let temp_opnd = RegOpnd::Stack(stack_idx);
// For each argument, if the stack temp for it has a register,
// let the callee use the register for the local variable.
if let Some(reg_idx) = self.ctx.get_reg_mapping().get_reg(temp_opnd) {
let local_opnd = RegOpnd::Local(arg_idx.try_into().unwrap());
callee_reg_mapping.set_reg(local_opnd, reg_idx);
mapped_temps.push(temp_opnd);
}
}
asm_comment!(self, "local maps: {:?}", callee_reg_mapping);
callee_ctx.set_reg_mapping(callee_reg_mapping);
mapped_temps
}
/// Spill all live registers to the stack /// Spill all live registers to the stack
pub fn spill_regs(&mut self) { pub fn spill_regs(&mut self) {
self.spill_regs_except(&vec![]);
}
/// Spill all live registers except `ignored_temps` to the stack
pub fn spill_regs_except(&mut self, ignored_temps: &Vec<RegOpnd>) {
// Forget registers above the stack top // Forget registers above the stack top
let mut reg_mapping = self.ctx.get_reg_mapping(); let mut reg_mapping = self.ctx.get_reg_mapping();
for stack_idx in self.ctx.get_stack_size()..MAX_CTX_TEMPS as u8 { for stack_idx in self.ctx.get_stack_size()..MAX_CTX_TEMPS as u8 {
@ -1250,36 +1278,46 @@ impl Assembler
} }
self.set_reg_mapping(reg_mapping); self.set_reg_mapping(reg_mapping);
// Spill live stack temps // If no registers are in use, skip all checks
if self.ctx.get_reg_mapping() != RegMapping::default() { if self.ctx.get_reg_mapping() == RegMapping::default() {
asm_comment!(self, "spill_temps: {:?} -> {:?}", self.ctx.get_reg_mapping(), RegMapping::default()); return;
// Spill stack temps
for stack_idx in 0..u8::min(MAX_CTX_TEMPS as u8, self.ctx.get_stack_size()) {
if reg_mapping.dealloc_reg(RegOpnd::Stack(stack_idx)) {
let idx = self.ctx.get_stack_size() - 1 - stack_idx;
self.spill_temp(self.stack_opnd(idx.into()));
}
}
// Spill locals
for local_idx in 0..MAX_CTX_TEMPS as u8 {
if reg_mapping.dealloc_reg(RegOpnd::Local(local_idx)) {
let first_local_ep_offset = self.num_locals.unwrap() + VM_ENV_DATA_SIZE - 1;
let ep_offset = first_local_ep_offset - local_idx as u32;
self.spill_temp(self.local_opnd(ep_offset));
}
}
self.ctx.set_reg_mapping(reg_mapping);
} }
// Every stack temp should have been spilled // Collect stack temps to be spilled
assert_eq!(self.ctx.get_reg_mapping(), RegMapping::default()); let mut spilled_opnds = vec![];
for stack_idx in 0..u8::min(MAX_CTX_TEMPS as u8, self.ctx.get_stack_size()) {
let reg_opnd = RegOpnd::Stack(stack_idx);
if !ignored_temps.contains(&reg_opnd) && reg_mapping.dealloc_reg(reg_opnd) {
let idx = self.ctx.get_stack_size() - 1 - stack_idx;
let spilled_opnd = self.stack_opnd(idx.into());
spilled_opnds.push(spilled_opnd);
reg_mapping.dealloc_reg(spilled_opnd.reg_opnd());
}
}
// Collect locals to be spilled
for local_idx in 0..MAX_CTX_TEMPS as u8 {
if reg_mapping.dealloc_reg(RegOpnd::Local(local_idx)) {
let first_local_ep_offset = self.num_locals.unwrap() + VM_ENV_DATA_SIZE - 1;
let ep_offset = first_local_ep_offset - local_idx as u32;
let spilled_opnd = self.local_opnd(ep_offset);
spilled_opnds.push(spilled_opnd);
reg_mapping.dealloc_reg(spilled_opnd.reg_opnd());
}
}
// Spill stack temps and locals
if !spilled_opnds.is_empty() {
asm_comment!(self, "spill_regs: {:?} -> {:?}", self.ctx.get_reg_mapping(), reg_mapping);
for &spilled_opnd in spilled_opnds.iter() {
self.spill_reg(spilled_opnd);
}
self.ctx.set_reg_mapping(reg_mapping);
}
} }
/// Spill a stack temp from a register to the stack /// Spill a stack temp from a register to the stack
fn spill_temp(&mut self, opnd: Opnd) { fn spill_reg(&mut self, opnd: Opnd) {
assert_ne!(self.ctx.get_reg_mapping().get_reg(opnd.reg_opnd()), None); assert_ne!(self.ctx.get_reg_mapping().get_reg(opnd.reg_opnd()), None);
// Use different RegMappings for dest and src operands // Use different RegMappings for dest and src operands
@ -1308,6 +1346,42 @@ impl Assembler
} }
} }
// Shuffle register moves, sometimes adding extra moves using SCRATCH_REG,
// so that they will not rewrite each other before they are used.
pub fn reorder_reg_moves(old_moves: &Vec<(Reg, Opnd)>) -> Vec<(Reg, Opnd)> {
// Return the index of a move whose destination is not used as a source if any.
fn find_safe_move(moves: &Vec<(Reg, Opnd)>) -> Option<usize> {
moves.iter().enumerate().find(|(_, &(dest_reg, _))| {
moves.iter().all(|&(_, src_opnd)| src_opnd != Opnd::Reg(dest_reg))
}).map(|(index, _)| index)
}
// Remove moves whose source and destination are the same
let mut old_moves: Vec<(Reg, Opnd)> = old_moves.clone().into_iter()
.filter(|&(reg, opnd)| Opnd::Reg(reg) != opnd).collect();
let mut new_moves = vec![];
while old_moves.len() > 0 {
// Keep taking safe moves
while let Some(index) = find_safe_move(&old_moves) {
new_moves.push(old_moves.remove(index));
}
// No safe move. Load the source of one move into SCRATCH_REG, and
// then load SCRATCH_REG into the destination when it's safe.
if old_moves.len() > 0 {
// Make sure it's safe to use SCRATCH_REG
assert!(old_moves.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG)));
// Move SCRATCH <- opnd, and delay reg <- SCRATCH
let (reg, opnd) = old_moves.remove(0);
new_moves.push((Assembler::SCRATCH_REG, opnd));
old_moves.push((reg, Opnd::Reg(Assembler::SCRATCH_REG)));
}
}
new_moves
}
/// Sets the out field on the various instructions that require allocated /// Sets the out field on the various instructions that require allocated
/// registers because their output is used as the operand on a subsequent /// registers because their output is used as the operand on a subsequent
/// instruction. This is our implementation of the linear scan algorithm. /// instruction. This is our implementation of the linear scan algorithm.
@ -1353,42 +1427,6 @@ impl Assembler
} }
} }
// Reorder C argument moves, sometimes adding extra moves using SCRATCH_REG,
// so that they will not rewrite each other before they are used.
fn reorder_c_args(c_args: &Vec<(Reg, Opnd)>) -> Vec<(Reg, Opnd)> {
// Return the index of a move whose destination is not used as a source if any.
fn find_safe_arg(c_args: &Vec<(Reg, Opnd)>) -> Option<usize> {
c_args.iter().enumerate().find(|(_, &(dest_reg, _))| {
c_args.iter().all(|&(_, src_opnd)| src_opnd != Opnd::Reg(dest_reg))
}).map(|(index, _)| index)
}
// Remove moves whose source and destination are the same
let mut c_args: Vec<(Reg, Opnd)> = c_args.clone().into_iter()
.filter(|&(reg, opnd)| Opnd::Reg(reg) != opnd).collect();
let mut moves = vec![];
while c_args.len() > 0 {
// Keep taking safe moves
while let Some(index) = find_safe_arg(&c_args) {
moves.push(c_args.remove(index));
}
// No safe move. Load the source of one move into SCRATCH_REG, and
// then load SCRATCH_REG into the destination when it's safe.
if c_args.len() > 0 {
// Make sure it's safe to use SCRATCH_REG
assert!(c_args.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG)));
// Move SCRATCH <- opnd, and delay reg <- SCRATCH
let (reg, opnd) = c_args.remove(0);
moves.push((Assembler::SCRATCH_REG, opnd));
c_args.push((reg, Opnd::Reg(Assembler::SCRATCH_REG)));
}
}
moves
}
// Adjust the number of entries in live_ranges so that it can be indexed by mapped indexes. // Adjust the number of entries in live_ranges so that it can be indexed by mapped indexes.
fn shift_live_ranges(live_ranges: &mut Vec<usize>, start_index: usize, shift_offset: isize) { fn shift_live_ranges(live_ranges: &mut Vec<usize>, start_index: usize, shift_offset: isize) {
if shift_offset >= 0 { if shift_offset >= 0 {
@ -1564,7 +1602,7 @@ impl Assembler
if c_args.len() > 0 { if c_args.len() > 0 {
// Resolve C argument dependencies // Resolve C argument dependencies
let c_args_len = c_args.len() as isize; let c_args_len = c_args.len() as isize;
let moves = reorder_c_args(&c_args.drain(..).into_iter().collect()); let moves = Self::reorder_reg_moves(&c_args.drain(..).into_iter().collect());
shift_live_ranges(&mut shifted_live_ranges, asm.insns.len(), moves.len() as isize - c_args_len); shift_live_ranges(&mut shifted_live_ranges, asm.insns.len(), moves.len() as isize - c_args_len);
// Push batched C arguments // Push batched C arguments
@ -1808,7 +1846,7 @@ impl Assembler {
// Mark all temps as not being in registers. // Mark all temps as not being in registers.
// Temps will be marked back as being in registers by cpop_all. // Temps will be marked back as being in registers by cpop_all.
// We assume that cpush_all + cpop_all are used for C functions in utils.rs // We assume that cpush_all + cpop_all are used for C functions in utils.rs
// that don't require spill_temps for GC. // that don't require spill_regs for GC.
self.set_reg_mapping(RegMapping::default()); self.set_reg_mapping(RegMapping::default());
} }

View File

@ -3,6 +3,7 @@
use crate::asm::*; use crate::asm::*;
use crate::backend::ir::*; use crate::backend::ir::*;
use crate::backend::current::TEMP_REGS;
use crate::core::*; use crate::core::*;
use crate::cruby::*; use crate::cruby::*;
use crate::invariants::*; use crate::invariants::*;
@ -114,10 +115,13 @@ pub struct JITState<'a> {
/// Stack of symbol names for --yjit-perf /// Stack of symbol names for --yjit-perf
perf_stack: Vec<String>, perf_stack: Vec<String>,
/// When true, this block is the first block compiled by gen_block_series().
first_block: bool,
} }
impl<'a> JITState<'a> { impl<'a> JITState<'a> {
pub fn new(blockid: BlockId, starting_ctx: Context, output_ptr: CodePtr, ec: EcPtr, ocb: &'a mut OutlinedCb) -> Self { pub fn new(blockid: BlockId, starting_ctx: Context, output_ptr: CodePtr, ec: EcPtr, ocb: &'a mut OutlinedCb, first_block: bool) -> Self {
JITState { JITState {
iseq: blockid.iseq, iseq: blockid.iseq,
starting_insn_idx: blockid.idx, starting_insn_idx: blockid.idx,
@ -140,6 +144,7 @@ impl<'a> JITState<'a> {
block_assumes_single_ractor: false, block_assumes_single_ractor: false,
perf_map: Rc::default(), perf_map: Rc::default(),
perf_stack: vec![], perf_stack: vec![],
first_block,
} }
} }
@ -212,9 +217,16 @@ impl<'a> JITState<'a> {
self.next_insn_idx() + insn_len(next_opcode) as u16 self.next_insn_idx() + insn_len(next_opcode) as u16
} }
// Check if we are compiling the instruction at the stub PC // Check if we are compiling the instruction at the stub PC with the target Context
// Meaning we are compiling the instruction that is next to execute // Meaning we are compiling the instruction that is next to execute
pub fn at_current_insn(&self) -> bool { pub fn at_compile_target(&self) -> bool {
// If this is not the first block compiled by gen_block_series(),
// it might be compiling the same block again with a different Context.
// In that case, it should defer_compilation() and inspect the stack there.
if !self.first_block {
return false;
}
let ec_pc: *mut VALUE = unsafe { get_cfp_pc(self.get_cfp()) }; let ec_pc: *mut VALUE = unsafe { get_cfp_pc(self.get_cfp()) };
ec_pc == self.pc ec_pc == self.pc
} }
@ -222,7 +234,7 @@ impl<'a> JITState<'a> {
// Peek at the nth topmost value on the Ruby stack. // Peek at the nth topmost value on the Ruby stack.
// Returns the topmost value when n == 0. // Returns the topmost value when n == 0.
pub fn peek_at_stack(&self, ctx: &Context, n: isize) -> VALUE { pub fn peek_at_stack(&self, ctx: &Context, n: isize) -> VALUE {
assert!(self.at_current_insn()); assert!(self.at_compile_target());
assert!(n < ctx.get_stack_size() as isize); assert!(n < ctx.get_stack_size() as isize);
// Note: this does not account for ctx->sp_offset because // Note: this does not account for ctx->sp_offset because
@ -241,7 +253,7 @@ impl<'a> JITState<'a> {
} }
fn peek_at_local(&self, n: i32) -> VALUE { fn peek_at_local(&self, n: i32) -> VALUE {
assert!(self.at_current_insn()); assert!(self.at_compile_target());
let local_table_size: isize = unsafe { get_iseq_body_local_table_size(self.iseq) } let local_table_size: isize = unsafe { get_iseq_body_local_table_size(self.iseq) }
.try_into() .try_into()
@ -257,7 +269,7 @@ impl<'a> JITState<'a> {
} }
fn peek_at_block_handler(&self, level: u32) -> VALUE { fn peek_at_block_handler(&self, level: u32) -> VALUE {
assert!(self.at_current_insn()); assert!(self.at_compile_target());
unsafe { unsafe {
let ep = get_cfp_ep_level(self.get_cfp(), level); let ep = get_cfp_ep_level(self.get_cfp(), level);
@ -656,7 +668,7 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
} }
// Only able to check types when at current insn // Only able to check types when at current insn
assert!(jit.at_current_insn()); assert!(jit.at_compile_target());
let self_val = jit.peek_at_self(); let self_val = jit.peek_at_self();
let self_val_type = Type::from(self_val); let self_val_type = Type::from(self_val);
@ -1172,6 +1184,7 @@ pub fn gen_single_block(
ec: EcPtr, ec: EcPtr,
cb: &mut CodeBlock, cb: &mut CodeBlock,
ocb: &mut OutlinedCb, ocb: &mut OutlinedCb,
first_block: bool,
) -> Result<BlockRef, ()> { ) -> Result<BlockRef, ()> {
// Limit the number of specialized versions for this block // Limit the number of specialized versions for this block
let ctx = limit_block_versions(blockid, start_ctx); let ctx = limit_block_versions(blockid, start_ctx);
@ -1195,7 +1208,7 @@ pub fn gen_single_block(
let mut insn_idx: IseqIdx = blockid.idx; let mut insn_idx: IseqIdx = blockid.idx;
// Initialize a JIT state object // Initialize a JIT state object
let mut jit = JITState::new(blockid, ctx, cb.get_write_ptr(), ec, ocb); let mut jit = JITState::new(blockid, ctx, cb.get_write_ptr(), ec, ocb, first_block);
jit.iseq = blockid.iseq; jit.iseq = blockid.iseq;
// Create a backend assembler instance // Create a backend assembler instance
@ -1265,7 +1278,7 @@ pub fn gen_single_block(
} }
// In debug mode, verify our existing assumption // In debug mode, verify our existing assumption
if cfg!(debug_assertions) && get_option!(verify_ctx) && jit.at_current_insn() { if cfg!(debug_assertions) && get_option!(verify_ctx) && jit.at_compile_target() {
verify_ctx(&jit, &asm.ctx); verify_ctx(&jit, &asm.ctx);
} }
@ -1508,7 +1521,7 @@ fn fuse_putobject_opt_ltlt(
if shift_amt > 63 || shift_amt < 0 { if shift_amt > 63 || shift_amt < 0 {
return None; return None;
} }
if !jit.at_current_insn() { if !jit.at_compile_target() {
defer_compilation(jit, asm); defer_compilation(jit, asm);
return Some(EndBlock); return Some(EndBlock);
} }
@ -1772,7 +1785,7 @@ fn gen_splatkw(
asm: &mut Assembler, asm: &mut Assembler,
) -> Option<CodegenStatus> { ) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime hash operand // Defer compilation so we can specialize on a runtime hash operand
if !jit.at_current_insn() { if !jit.at_compile_target() {
defer_compilation(jit, asm); defer_compilation(jit, asm);
return Some(EndBlock); return Some(EndBlock);
} }
@ -2146,7 +2159,7 @@ fn gen_expandarray(
let array_opnd = asm.stack_opnd(0); let array_opnd = asm.stack_opnd(0);
// Defer compilation so we can specialize on a runtime `self` // Defer compilation so we can specialize on a runtime `self`
if !jit.at_current_insn() { if !jit.at_compile_target() {
defer_compilation(jit, asm); defer_compilation(jit, asm);
return Some(EndBlock); return Some(EndBlock);
} }
@ -2345,7 +2358,16 @@ fn gen_getlocal_generic(
// Load the local from the block // Load the local from the block
// val = *(vm_get_ep(GET_EP(), level) - idx); // val = *(vm_get_ep(GET_EP(), level) - idx);
let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32); let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32);
Opnd::mem(64, ep_opnd, offs) let local_opnd = Opnd::mem(64, ep_opnd, offs);
// Write back an argument register to the stack. If the local variable
// is an argument, it might have an allocated register, but if this ISEQ
// is known to escape EP, the register shouldn't be used after this getlocal.
if level == 0 && asm.ctx.get_reg_mapping().get_reg(asm.local_opnd(ep_offset).reg_opnd()).is_some() {
asm.mov(local_opnd, asm.local_opnd(ep_offset));
}
local_opnd
}; };
// Write the local at SP // Write the local at SP
@ -2425,6 +2447,13 @@ fn gen_setlocal_generic(
asm.alloc_reg(local_opnd.reg_opnd()); asm.alloc_reg(local_opnd.reg_opnd());
(flags_opnd, local_opnd) (flags_opnd, local_opnd)
} else { } else {
// Make sure getlocal doesn't read a stale register. If the local variable
// is an argument, it might have an allocated register, but if this ISEQ
// is known to escape EP, the register shouldn't be used after this setlocal.
if level == 0 {
asm.ctx.dealloc_reg(asm.local_opnd(ep_offset).reg_opnd());
}
// Load flags and the local for the level // Load flags and the local for the level
let ep_opnd = gen_get_ep(asm, level); let ep_opnd = gen_get_ep(asm, level);
let flags_opnd = Opnd::mem( let flags_opnd = Opnd::mem(
@ -2627,11 +2656,11 @@ fn gen_checkkeyword(
// The index of the keyword we want to check // The index of the keyword we want to check
let index: i64 = jit.get_arg(1).as_i64(); let index: i64 = jit.get_arg(1).as_i64();
// Load environment pointer EP // `unspecified_bits` is a part of the local table. Therefore, we may allocate a register for
let ep_opnd = gen_get_ep(asm, 0); // that "local" when passing it as an argument. We must use such a register to avoid loading
// random bits from the stack if any. We assume that EP is not escaped as of entering a method
// VALUE kw_bits = *(ep - bits); // with keyword arguments.
let bits_opnd = Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * -bits_offset); let bits_opnd = asm.local_opnd(bits_offset as u32);
// unsigned int b = (unsigned int)FIX2ULONG(kw_bits); // unsigned int b = (unsigned int)FIX2ULONG(kw_bits);
// if ((b & (0x01 << idx))) { // if ((b & (0x01 << idx))) {
@ -2846,7 +2875,7 @@ fn gen_getinstancevariable(
asm: &mut Assembler, asm: &mut Assembler,
) -> Option<CodegenStatus> { ) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime `self` // Defer compilation so we can specialize on a runtime `self`
if !jit.at_current_insn() { if !jit.at_compile_target() {
defer_compilation(jit, asm); defer_compilation(jit, asm);
return Some(EndBlock); return Some(EndBlock);
} }
@ -2910,7 +2939,7 @@ fn gen_setinstancevariable(
asm: &mut Assembler, asm: &mut Assembler,
) -> Option<CodegenStatus> { ) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime `self` // Defer compilation so we can specialize on a runtime `self`
if !jit.at_current_insn() { if !jit.at_compile_target() {
defer_compilation(jit, asm); defer_compilation(jit, asm);
return Some(EndBlock); return Some(EndBlock);
} }
@ -3221,7 +3250,7 @@ fn gen_definedivar(
asm: &mut Assembler, asm: &mut Assembler,
) -> Option<CodegenStatus> { ) -> Option<CodegenStatus> {
// Defer compilation so we can specialize base on a runtime receiver // Defer compilation so we can specialize base on a runtime receiver
if !jit.at_current_insn() { if !jit.at_compile_target() {
defer_compilation(jit, asm); defer_compilation(jit, asm);
return Some(EndBlock); return Some(EndBlock);
} }
@ -3550,7 +3579,7 @@ fn gen_equality_specialized(
return Some(true); return Some(true);
} }
if !jit.at_current_insn() { if !jit.at_compile_target() {
return None; return None;
} }
let comptime_a = jit.peek_at_stack(&asm.ctx, 1); let comptime_a = jit.peek_at_stack(&asm.ctx, 1);
@ -3669,7 +3698,7 @@ fn gen_opt_aref(
} }
// Defer compilation so we can specialize base on a runtime receiver // Defer compilation so we can specialize base on a runtime receiver
if !jit.at_current_insn() { if !jit.at_compile_target() {
defer_compilation(jit, asm); defer_compilation(jit, asm);
return Some(EndBlock); return Some(EndBlock);
} }
@ -3770,7 +3799,7 @@ fn gen_opt_aset(
asm: &mut Assembler, asm: &mut Assembler,
) -> Option<CodegenStatus> { ) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on a runtime `self` // Defer compilation so we can specialize on a runtime `self`
if !jit.at_current_insn() { if !jit.at_compile_target() {
defer_compilation(jit, asm); defer_compilation(jit, asm);
return Some(EndBlock); return Some(EndBlock);
} }
@ -4376,7 +4405,7 @@ fn gen_opt_case_dispatch(
// We'd hope that our jitted code will be sufficiently fast without the // We'd hope that our jitted code will be sufficiently fast without the
// hash lookup, at least for small hashes, but it's worth revisiting this // hash lookup, at least for small hashes, but it's worth revisiting this
// assumption in the future. // assumption in the future.
if !jit.at_current_insn() { if !jit.at_compile_target() {
defer_compilation(jit, asm); defer_compilation(jit, asm);
return Some(EndBlock); return Some(EndBlock);
} }
@ -6433,14 +6462,6 @@ fn gen_push_frame(
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SELF), frame.recv); asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SELF), frame.recv);
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_BLOCK_CODE), 0.into()); asm.mov(cfp_opnd(RUBY_OFFSET_CFP_BLOCK_CODE), 0.into());
if frame.iseq.is_some() {
// Spill stack temps to let the callee use them (must be done before changing the SP register)
asm.spill_regs();
// Saving SP before calculating ep avoids a dependency on a register
// However this must be done after referencing frame.recv, which may be SP-relative
asm.mov(SP, sp);
}
let ep = asm.sub(sp, SIZEOF_VALUE.into()); let ep = asm.sub(sp, SIZEOF_VALUE.into());
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_EP), ep); asm.mov(cfp_opnd(RUBY_OFFSET_CFP_EP), ep);
} }
@ -7770,9 +7791,32 @@ fn gen_send_iseq(
pc: None, // We are calling into jitted code, which will set the PC as necessary pc: None, // We are calling into jitted code, which will set the PC as necessary
})); }));
// Create a context for the callee
let mut callee_ctx = Context::default();
// Transfer some stack temp registers to the callee's locals for arguments.
let mapped_temps = if !forwarding {
asm.map_temp_regs_to_args(&mut callee_ctx, argc)
} else {
// When forwarding, the callee's local table has only a callinfo,
// so we can't map the actual arguments to the callee's locals.
vec![]
};
// Spill stack temps and locals that are not used by the callee.
// This must be done before changing the SP register.
asm.spill_regs_except(&mapped_temps);
// Saving SP before calculating ep avoids a dependency on a register
// However this must be done after referencing frame.recv, which may be SP-relative
asm.mov(SP, callee_sp);
// Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs. // Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs.
// We also do this after gen_push_frame() to minimize the impact of spill_temps() on asm.ccall(). // We also do this after gen_push_frame() to minimize the impact of spill_temps() on asm.ccall().
if get_option!(gen_stats) { if get_option!(gen_stats) {
// Protect caller-saved registers in case they're used for arguments
asm.cpush_all();
// Assemble the ISEQ name string // Assemble the ISEQ name string
let name_str = get_iseq_name(iseq); let name_str = get_iseq_name(iseq);
@ -7781,6 +7825,7 @@ fn gen_send_iseq(
// Increment the counter for this cfunc // Increment the counter for this cfunc
asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]); asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]);
asm.cpop_all();
} }
// No need to set cfp->pc since the callee sets it whenever calling into routines // No need to set cfp->pc since the callee sets it whenever calling into routines
@ -7794,9 +7839,6 @@ fn gen_send_iseq(
idx: jit.next_insn_idx(), idx: jit.next_insn_idx(),
}; };
// Create a context for the callee
let mut callee_ctx = Context::default();
// If the callee has :inline_block annotation and the callsite has a block ISEQ, // If the callee has :inline_block annotation and the callsite has a block ISEQ,
// duplicate a callee block for each block ISEQ to make its `yield` monomorphic. // duplicate a callee block for each block ISEQ to make its `yield` monomorphic.
if let (Some(BlockHandler::BlockISeq(iseq)), true) = (block, builtin_attrs & BUILTIN_ATTR_INLINE_BLOCK != 0) { if let (Some(BlockHandler::BlockISeq(iseq)), true) = (block, builtin_attrs & BUILTIN_ATTR_INLINE_BLOCK != 0) {
@ -7816,6 +7858,7 @@ fn gen_send_iseq(
callee_ctx.set_local_type(0, Type::Unknown) callee_ctx.set_local_type(0, Type::Unknown)
} }
// Set the receiver type in the callee's context
let recv_type = if captured_self { let recv_type = if captured_self {
Type::Unknown // we don't track the type information of captured->self for now Type::Unknown // we don't track the type information of captured->self for now
} else { } else {
@ -7823,6 +7866,29 @@ fn gen_send_iseq(
}; };
callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type); callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type);
// Now that callee_ctx is prepared, discover a block that can be reused if we move some registers.
// If there's such a block, move registers accordingly to avoid creating a new block.
let blockid = BlockId { iseq, idx: start_pc_offset };
if !mapped_temps.is_empty() {
// Discover a block that have the same things in different (or same) registers
if let Some(block_ctx) = find_block_ctx_with_same_regs(blockid, &callee_ctx) {
// List pairs of moves for making the register mappings compatible
let mut moves = vec![];
for &reg_opnd in callee_ctx.get_reg_mapping().get_reg_opnds().iter() {
let old_reg = TEMP_REGS[callee_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()];
let new_reg = TEMP_REGS[block_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()];
moves.push((new_reg, Opnd::Reg(old_reg)));
}
// Shuffle them to break cycles and generate the moves
let moves = Assembler::reorder_reg_moves(&moves);
for (reg, opnd) in moves {
asm.load_into(Opnd::Reg(reg), opnd);
}
callee_ctx.set_reg_mapping(block_ctx.get_reg_mapping());
}
}
// The callee might change locals through Kernel#binding and other means. // The callee might change locals through Kernel#binding and other means.
asm.clear_local_types(); asm.clear_local_types();
@ -7856,10 +7922,7 @@ fn gen_send_iseq(
gen_direct_jump( gen_direct_jump(
jit, jit,
&callee_ctx, &callee_ctx,
BlockId { blockid,
iseq: iseq,
idx: start_pc_offset,
},
asm, asm,
); );
@ -8541,7 +8604,7 @@ fn gen_send_general(
let mut flags = unsafe { vm_ci_flag(ci) }; let mut flags = unsafe { vm_ci_flag(ci) };
// Defer compilation so we can specialize on class of receiver // Defer compilation so we can specialize on class of receiver
if !jit.at_current_insn() { if !jit.at_compile_target() {
defer_compilation(jit, asm); defer_compilation(jit, asm);
return Some(EndBlock); return Some(EndBlock);
} }
@ -9102,7 +9165,7 @@ fn gen_invokeblock_specialized(
asm: &mut Assembler, asm: &mut Assembler,
cd: *const rb_call_data, cd: *const rb_call_data,
) -> Option<CodegenStatus> { ) -> Option<CodegenStatus> {
if !jit.at_current_insn() { if !jit.at_compile_target() {
defer_compilation(jit, asm); defer_compilation(jit, asm);
return Some(EndBlock); return Some(EndBlock);
} }
@ -9265,7 +9328,7 @@ fn gen_invokesuper_specialized(
cd: *const rb_call_data, cd: *const rb_call_data,
) -> Option<CodegenStatus> { ) -> Option<CodegenStatus> {
// Defer compilation so we can specialize on class of receiver // Defer compilation so we can specialize on class of receiver
if !jit.at_current_insn() { if !jit.at_compile_target() {
defer_compilation(jit, asm); defer_compilation(jit, asm);
return Some(EndBlock); return Some(EndBlock);
} }
@ -9499,7 +9562,7 @@ fn gen_objtostring(
jit: &mut JITState, jit: &mut JITState,
asm: &mut Assembler, asm: &mut Assembler,
) -> Option<CodegenStatus> { ) -> Option<CodegenStatus> {
if !jit.at_current_insn() { if !jit.at_compile_target() {
defer_compilation(jit, asm); defer_compilation(jit, asm);
return Some(EndBlock); return Some(EndBlock);
} }
@ -9842,7 +9905,7 @@ fn gen_getblockparamproxy(
jit: &mut JITState, jit: &mut JITState,
asm: &mut Assembler, asm: &mut Assembler,
) -> Option<CodegenStatus> { ) -> Option<CodegenStatus> {
if !jit.at_current_insn() { if !jit.at_compile_target() {
defer_compilation(jit, asm); defer_compilation(jit, asm);
return Some(EndBlock); return Some(EndBlock);
} }
@ -10593,6 +10656,7 @@ mod tests {
cb.get_write_ptr(), cb.get_write_ptr(),
ptr::null(), // No execution context in tests. No peeking! ptr::null(), // No execution context in tests. No peeking!
ocb, ocb,
true,
) )
} }

View File

@ -371,6 +371,12 @@ impl RegMapping {
.map(|(reg_idx, _)| reg_idx) .map(|(reg_idx, _)| reg_idx)
} }
/// Set a given operand to the register at a given index.
pub fn set_reg(&mut self, opnd: RegOpnd, reg_idx: usize) {
assert!(self.0[reg_idx].is_none());
self.0[reg_idx] = Some(opnd);
}
/// Allocate a register for a given operand if available. /// Allocate a register for a given operand if available.
/// Return true if self is updated. /// Return true if self is updated.
pub fn alloc_reg(&mut self, opnd: RegOpnd) -> bool { pub fn alloc_reg(&mut self, opnd: RegOpnd) -> bool {
@ -435,6 +441,32 @@ impl RegMapping {
RegOpnd::Local(_) => index_temps.rev().find(|(_, reg_opnd)| reg_opnd.is_none()), RegOpnd::Local(_) => index_temps.rev().find(|(_, reg_opnd)| reg_opnd.is_none()),
}.map(|(index, _)| index) }.map(|(index, _)| index)
} }
/// Return a vector of RegOpnds that have an allocated register
pub fn get_reg_opnds(&self) -> Vec<RegOpnd> {
self.0.iter().filter_map(|&reg_opnd| reg_opnd).collect()
}
/// Return TypeDiff::Compatible(diff) if dst has a mapping that can be made by moving registers
/// in self `diff` times. TypeDiff::Incompatible if they have different things in registers.
pub fn diff(&self, dst: RegMapping) -> TypeDiff {
let src_opnds = self.get_reg_opnds();
let dst_opnds = dst.get_reg_opnds();
if src_opnds.len() != dst_opnds.len() {
return TypeDiff::Incompatible;
}
let mut diff = 0;
for &reg_opnd in src_opnds.iter() {
match (self.get_reg(reg_opnd), dst.get_reg(reg_opnd)) {
(Some(src_idx), Some(dst_idx)) => if src_idx != dst_idx {
diff += 1;
}
_ => return TypeDiff::Incompatible,
}
}
TypeDiff::Compatible(diff)
}
} }
impl fmt::Debug for RegMapping { impl fmt::Debug for RegMapping {
@ -2080,9 +2112,8 @@ pub fn take_version_list(blockid: BlockId) -> VersionList {
} }
} }
/// Count the number of block versions matching a given blockid /// Count the number of block versions that match a given BlockId and part of a Context
/// `inlined: true` counts inlined versions, and `inlined: false` counts other versions. fn get_num_versions(blockid: BlockId, ctx: &Context) -> usize {
fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
let insn_idx = blockid.idx.as_usize(); let insn_idx = blockid.idx.as_usize();
match get_iseq_payload(blockid.iseq) { match get_iseq_payload(blockid.iseq) {
@ -2094,9 +2125,14 @@ fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
.version_map .version_map
.get(insn_idx) .get(insn_idx)
.map(|versions| { .map(|versions| {
versions.iter().filter(|&&version| versions.iter().filter(|&&version| {
Context::decode(unsafe { version.as_ref() }.ctx).inline() == inlined let version_ctx = Context::decode(unsafe { version.as_ref() }.ctx);
).count() // Inline versions are counted separately towards MAX_INLINE_VERSIONS.
version_ctx.inline() == ctx.inline() &&
// find_block_versions() finds only blocks with compatible reg_mapping,
// so count only versions with compatible reg_mapping.
version_ctx.reg_mapping == ctx.reg_mapping
}).count()
}) })
.unwrap_or(0) .unwrap_or(0)
} }
@ -2128,10 +2164,7 @@ pub fn get_or_create_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> {
/// Retrieve a basic block version for an (iseq, idx) tuple /// Retrieve a basic block version for an (iseq, idx) tuple
/// This will return None if no version is found /// This will return None if no version is found
fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> { fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
let versions = match get_version_list(blockid) { let versions = get_version_list(blockid)?;
Some(versions) => versions,
None => return None,
};
// Best match found // Best match found
let mut best_version: Option<BlockRef> = None; let mut best_version: Option<BlockRef> = None;
@ -2156,6 +2189,33 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
return best_version; return best_version;
} }
/// Basically find_block_version() but allows RegMapping incompatibility
/// that can be fixed by register moves and returns Context
pub fn find_block_ctx_with_same_regs(blockid: BlockId, ctx: &Context) -> Option<Context> {
let versions = get_version_list(blockid)?;
// Best match found
let mut best_ctx: Option<Context> = None;
let mut best_diff = usize::MAX;
// For each version matching the blockid
for blockref in versions.iter() {
let block = unsafe { blockref.as_ref() };
let block_ctx = Context::decode(block.ctx);
// Discover the best block that is compatible if we move registers
match ctx.diff_with_same_regs(&block_ctx) {
TypeDiff::Compatible(diff) if diff < best_diff => {
best_ctx = Some(block_ctx);
best_diff = diff;
}
_ => {}
}
}
best_ctx
}
/// Allow inlining a Block up to MAX_INLINE_VERSIONS times. /// Allow inlining a Block up to MAX_INLINE_VERSIONS times.
const MAX_INLINE_VERSIONS: usize = 1000; const MAX_INLINE_VERSIONS: usize = 1000;
@ -2166,7 +2226,7 @@ pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
return *ctx; return *ctx;
} }
let next_versions = get_num_versions(blockid, ctx.inline()) + 1; let next_versions = get_num_versions(blockid, ctx) + 1;
let max_versions = if ctx.inline() { let max_versions = if ctx.inline() {
MAX_INLINE_VERSIONS MAX_INLINE_VERSIONS
} else { } else {
@ -2782,8 +2842,24 @@ impl Context {
return TypeDiff::Compatible(diff); return TypeDiff::Compatible(diff);
} }
/// Basically diff() but allows RegMapping incompatibility that can be fixed
/// by register moves.
pub fn diff_with_same_regs(&self, dst: &Context) -> TypeDiff {
// Prepare a Context with the same registers
let mut dst_with_same_regs = dst.clone();
dst_with_same_regs.set_reg_mapping(self.get_reg_mapping());
// Diff registers and other stuff separately, and merge them
match (self.diff(&dst_with_same_regs), self.get_reg_mapping().diff(dst.get_reg_mapping())) {
(TypeDiff::Compatible(ctx_diff), TypeDiff::Compatible(reg_diff)) => {
TypeDiff::Compatible(ctx_diff + reg_diff)
}
_ => TypeDiff::Incompatible
}
}
pub fn two_fixnums_on_stack(&self, jit: &mut JITState) -> Option<bool> { pub fn two_fixnums_on_stack(&self, jit: &mut JITState) -> Option<bool> {
if jit.at_current_insn() { if jit.at_compile_target() {
let comptime_recv = jit.peek_at_stack(self, 1); let comptime_recv = jit.peek_at_stack(self, 1);
let comptime_arg = jit.peek_at_stack(self, 0); let comptime_arg = jit.peek_at_stack(self, 0);
return Some(comptime_recv.fixnum_p() && comptime_arg.fixnum_p()); return Some(comptime_recv.fixnum_p() && comptime_arg.fixnum_p());
@ -2955,7 +3031,7 @@ fn gen_block_series_body(
let mut batch = Vec::with_capacity(EXPECTED_BATCH_SIZE); let mut batch = Vec::with_capacity(EXPECTED_BATCH_SIZE);
// Generate code for the first block // Generate code for the first block
let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb).ok()?; let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb, true).ok()?;
batch.push(first_block); // Keep track of this block version batch.push(first_block); // Keep track of this block version
// Add the block version to the VersionMap for this ISEQ // Add the block version to the VersionMap for this ISEQ
@ -2996,7 +3072,7 @@ fn gen_block_series_body(
// Generate new block using context from the last branch. // Generate new block using context from the last branch.
let requested_ctx = Context::decode(requested_ctx); let requested_ctx = Context::decode(requested_ctx);
let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb); let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb, false);
// If the block failed to compile // If the block failed to compile
if result.is_err() { if result.is_err() {
@ -4312,7 +4388,7 @@ mod tests {
let cb = CodeBlock::new_dummy(1024); let cb = CodeBlock::new_dummy(1024);
let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(1024)); let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(1024));
let dumm_addr = cb.get_write_ptr(); let dumm_addr = cb.get_write_ptr();
let block = JITState::new(blockid, Context::default(), dumm_addr, ptr::null(), &mut ocb) let block = JITState::new(blockid, Context::default(), dumm_addr, ptr::null(), &mut ocb, true)
.into_block(0, dumm_addr, dumm_addr, vec![]); .into_block(0, dumm_addr, dumm_addr, vec![]);
let _dropper = BlockDropper(block); let _dropper = BlockDropper(block);