YJIT: Spill/load argument registers to reuse blocks (#12287)
* YJIT: Spill/load argument registers to reuse blocks * Mention the immediate function name * Explain the context behind spill/load operations
This commit is contained in:
parent
93f8de777f
commit
cff031253f
Notes:
git
2024-12-09 18:02:58 +00:00
Merged-By: k0kubun <takashikkbn@gmail.com>
@ -1317,7 +1317,7 @@ impl Assembler
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Spill a stack temp from a register to the stack
|
/// Spill a stack temp from a register to the stack
|
||||||
fn spill_reg(&mut self, opnd: Opnd) {
|
pub fn spill_reg(&mut self, opnd: Opnd) {
|
||||||
assert_ne!(self.ctx.get_reg_mapping().get_reg(opnd.reg_opnd()), None);
|
assert_ne!(self.ctx.get_reg_mapping().get_reg(opnd.reg_opnd()), None);
|
||||||
|
|
||||||
// Use different RegMappings for dest and src operands
|
// Use different RegMappings for dest and src operands
|
||||||
|
@ -8136,53 +8136,16 @@ fn gen_send_iseq(
|
|||||||
pc: None, // We are calling into jitted code, which will set the PC as necessary
|
pc: None, // We are calling into jitted code, which will set the PC as necessary
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// Create a context for the callee
|
|
||||||
let mut callee_ctx = Context::default();
|
|
||||||
|
|
||||||
// Transfer some stack temp registers to the callee's locals for arguments.
|
|
||||||
let mapped_temps = if !forwarding {
|
|
||||||
asm.map_temp_regs_to_args(&mut callee_ctx, argc)
|
|
||||||
} else {
|
|
||||||
// When forwarding, the callee's local table has only a callinfo,
|
|
||||||
// so we can't map the actual arguments to the callee's locals.
|
|
||||||
vec![]
|
|
||||||
};
|
|
||||||
|
|
||||||
// Spill stack temps and locals that are not used by the callee.
|
|
||||||
// This must be done before changing the SP register.
|
|
||||||
asm.spill_regs_except(&mapped_temps);
|
|
||||||
|
|
||||||
// Saving SP before calculating ep avoids a dependency on a register
|
|
||||||
// However this must be done after referencing frame.recv, which may be SP-relative
|
|
||||||
asm.mov(SP, callee_sp);
|
|
||||||
|
|
||||||
// Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs.
|
|
||||||
// We also do this after gen_push_frame() to minimize the impact of spill_temps() on asm.ccall().
|
|
||||||
if get_option!(gen_stats) {
|
|
||||||
// Protect caller-saved registers in case they're used for arguments
|
|
||||||
asm.cpush_all();
|
|
||||||
|
|
||||||
// Assemble the ISEQ name string
|
|
||||||
let name_str = get_iseq_name(iseq);
|
|
||||||
|
|
||||||
// Get an index for this ISEQ name
|
|
||||||
let iseq_idx = get_iseq_idx(&name_str);
|
|
||||||
|
|
||||||
// Increment the counter for this cfunc
|
|
||||||
asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]);
|
|
||||||
asm.cpop_all();
|
|
||||||
}
|
|
||||||
|
|
||||||
// No need to set cfp->pc since the callee sets it whenever calling into routines
|
// No need to set cfp->pc since the callee sets it whenever calling into routines
|
||||||
// that could look at it through jit_save_pc().
|
// that could look at it through jit_save_pc().
|
||||||
// mov(cb, REG0, const_ptr_opnd(start_pc));
|
// mov(cb, REG0, const_ptr_opnd(start_pc));
|
||||||
// mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), REG0);
|
// mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), REG0);
|
||||||
|
|
||||||
// Stub so we can return to JITted code
|
// Create a blockid for the callee
|
||||||
let return_block = BlockId {
|
let callee_blockid = BlockId { iseq, idx: start_pc_offset };
|
||||||
iseq: jit.iseq,
|
|
||||||
idx: jit.next_insn_idx(),
|
// Create a context for the callee
|
||||||
};
|
let mut callee_ctx = Context::default();
|
||||||
|
|
||||||
// If the callee has :inline_block annotation and the callsite has a block ISEQ,
|
// If the callee has :inline_block annotation and the callsite has a block ISEQ,
|
||||||
// duplicate a callee block for each block ISEQ to make its `yield` monomorphic.
|
// duplicate a callee block for each block ISEQ to make its `yield` monomorphic.
|
||||||
@ -8211,29 +8174,92 @@ fn gen_send_iseq(
|
|||||||
};
|
};
|
||||||
callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type);
|
callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type);
|
||||||
|
|
||||||
// Now that callee_ctx is prepared, discover a block that can be reused if we move some registers.
|
// Spill or preserve argument registers
|
||||||
// If there's such a block, move registers accordingly to avoid creating a new block.
|
if forwarding {
|
||||||
let blockid = BlockId { iseq, idx: start_pc_offset };
|
// When forwarding, the callee's local table has only a callinfo,
|
||||||
if !mapped_temps.is_empty() {
|
// so we can't map the actual arguments to the callee's locals.
|
||||||
// Discover a block that have the same things in different (or same) registers
|
asm.spill_regs();
|
||||||
if let Some(block_ctx) = find_block_ctx_with_same_regs(blockid, &callee_ctx) {
|
} else {
|
||||||
// List pairs of moves for making the register mappings compatible
|
// Discover stack temp registers that can be used as the callee's locals
|
||||||
|
let mapped_temps = asm.map_temp_regs_to_args(&mut callee_ctx, argc);
|
||||||
|
|
||||||
|
// Spill stack temps and locals that are not used by the callee.
|
||||||
|
// This must be done before changing the SP register.
|
||||||
|
asm.spill_regs_except(&mapped_temps);
|
||||||
|
|
||||||
|
// If the callee block has been compiled before, spill/move registers to reuse the existing block
|
||||||
|
// for minimizing the number of blocks we need to compile.
|
||||||
|
if let Some(existing_reg_mapping) = find_most_compatible_reg_mapping(callee_blockid, &callee_ctx) {
|
||||||
|
asm_comment!(asm, "reuse maps: {:?} -> {:?}", callee_ctx.get_reg_mapping(), existing_reg_mapping);
|
||||||
|
|
||||||
|
// Spill the registers that are not used in the existing block.
|
||||||
|
// When the same ISEQ is compiled as an entry block, it starts with no registers allocated.
|
||||||
|
for ®_opnd in callee_ctx.get_reg_mapping().get_reg_opnds().iter() {
|
||||||
|
if existing_reg_mapping.get_reg(reg_opnd).is_none() {
|
||||||
|
match reg_opnd {
|
||||||
|
RegOpnd::Local(local_idx) => {
|
||||||
|
let spilled_temp = asm.stack_opnd(argc - local_idx as i32 - 1);
|
||||||
|
asm.spill_reg(spilled_temp);
|
||||||
|
callee_ctx.dealloc_reg(reg_opnd);
|
||||||
|
}
|
||||||
|
RegOpnd::Stack(_) => unreachable!("callee {:?} should have been spilled", reg_opnd),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert!(callee_ctx.get_reg_mapping().get_reg_opnds().len() <= existing_reg_mapping.get_reg_opnds().len());
|
||||||
|
|
||||||
|
// Load the registers that are spilled in this block but used in the existing block.
|
||||||
|
// When there are multiple callsites, some registers spilled in this block may be used at other callsites.
|
||||||
|
for ®_opnd in existing_reg_mapping.get_reg_opnds().iter() {
|
||||||
|
if callee_ctx.get_reg_mapping().get_reg(reg_opnd).is_none() {
|
||||||
|
match reg_opnd {
|
||||||
|
RegOpnd::Local(local_idx) => {
|
||||||
|
callee_ctx.alloc_reg(reg_opnd);
|
||||||
|
let loaded_reg = TEMP_REGS[callee_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()];
|
||||||
|
let loaded_temp = asm.stack_opnd(argc - local_idx as i32 - 1);
|
||||||
|
asm.load_into(Opnd::Reg(loaded_reg), loaded_temp);
|
||||||
|
}
|
||||||
|
RegOpnd::Stack(_) => unreachable!("find_most_compatible_reg_mapping should not leave {:?}", reg_opnd),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert_eq!(callee_ctx.get_reg_mapping().get_reg_opnds().len(), existing_reg_mapping.get_reg_opnds().len());
|
||||||
|
|
||||||
|
// Shuffle registers to make the register mappings compatible
|
||||||
let mut moves = vec![];
|
let mut moves = vec![];
|
||||||
for ®_opnd in callee_ctx.get_reg_mapping().get_reg_opnds().iter() {
|
for ®_opnd in callee_ctx.get_reg_mapping().get_reg_opnds().iter() {
|
||||||
let old_reg = TEMP_REGS[callee_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()];
|
let old_reg = TEMP_REGS[callee_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()];
|
||||||
let new_reg = TEMP_REGS[block_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()];
|
let new_reg = TEMP_REGS[existing_reg_mapping.get_reg(reg_opnd).unwrap()];
|
||||||
moves.push((new_reg, Opnd::Reg(old_reg)));
|
moves.push((new_reg, Opnd::Reg(old_reg)));
|
||||||
}
|
}
|
||||||
|
for (reg, opnd) in Assembler::reorder_reg_moves(&moves) {
|
||||||
// Shuffle them to break cycles and generate the moves
|
|
||||||
let moves = Assembler::reorder_reg_moves(&moves);
|
|
||||||
for (reg, opnd) in moves {
|
|
||||||
asm.load_into(Opnd::Reg(reg), opnd);
|
asm.load_into(Opnd::Reg(reg), opnd);
|
||||||
}
|
}
|
||||||
callee_ctx.set_reg_mapping(block_ctx.get_reg_mapping());
|
callee_ctx.set_reg_mapping(existing_reg_mapping);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update SP register for the callee. This must be done after referencing frame.recv,
|
||||||
|
// which may be SP-relative.
|
||||||
|
asm.mov(SP, callee_sp);
|
||||||
|
|
||||||
|
// Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs.
|
||||||
|
// We also do this after spill_regs() to avoid doubly spilling the same thing on asm.ccall().
|
||||||
|
if get_option!(gen_stats) {
|
||||||
|
// Protect caller-saved registers in case they're used for arguments
|
||||||
|
asm.cpush_all();
|
||||||
|
|
||||||
|
// Assemble the ISEQ name string
|
||||||
|
let name_str = get_iseq_name(iseq);
|
||||||
|
|
||||||
|
// Get an index for this ISEQ name
|
||||||
|
let iseq_idx = get_iseq_idx(&name_str);
|
||||||
|
|
||||||
|
// Increment the counter for this cfunc
|
||||||
|
asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]);
|
||||||
|
asm.cpop_all();
|
||||||
|
}
|
||||||
|
|
||||||
// The callee might change locals through Kernel#binding and other means.
|
// The callee might change locals through Kernel#binding and other means.
|
||||||
asm.clear_local_types();
|
asm.clear_local_types();
|
||||||
|
|
||||||
@ -8246,6 +8272,12 @@ fn gen_send_iseq(
|
|||||||
return_asm.ctx.reset_chain_depth_and_defer();
|
return_asm.ctx.reset_chain_depth_and_defer();
|
||||||
return_asm.ctx.set_as_return_landing();
|
return_asm.ctx.set_as_return_landing();
|
||||||
|
|
||||||
|
// Stub so we can return to JITted code
|
||||||
|
let return_block = BlockId {
|
||||||
|
iseq: jit.iseq,
|
||||||
|
idx: jit.next_insn_idx(),
|
||||||
|
};
|
||||||
|
|
||||||
// Write the JIT return address on the callee frame
|
// Write the JIT return address on the callee frame
|
||||||
jit.gen_branch(
|
jit.gen_branch(
|
||||||
asm,
|
asm,
|
||||||
@ -8266,7 +8298,7 @@ fn gen_send_iseq(
|
|||||||
gen_direct_jump(
|
gen_direct_jump(
|
||||||
jit,
|
jit,
|
||||||
&callee_ctx,
|
&callee_ctx,
|
||||||
blockid,
|
callee_blockid,
|
||||||
asm,
|
asm,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -447,25 +447,9 @@ impl RegMapping {
|
|||||||
self.0.iter().filter_map(|®_opnd| reg_opnd).collect()
|
self.0.iter().filter_map(|®_opnd| reg_opnd).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return TypeDiff::Compatible(diff) if dst has a mapping that can be made by moving registers
|
/// Count the number of registers that store a different operand from `dst`.
|
||||||
/// in self `diff` times. TypeDiff::Incompatible if they have different things in registers.
|
pub fn diff(&self, dst: RegMapping) -> usize {
|
||||||
pub fn diff(&self, dst: RegMapping) -> TypeDiff {
|
self.0.iter().enumerate().filter(|&(reg_idx, ®)| reg != dst.0[reg_idx]).count()
|
||||||
let src_opnds = self.get_reg_opnds();
|
|
||||||
let dst_opnds = dst.get_reg_opnds();
|
|
||||||
if src_opnds.len() != dst_opnds.len() {
|
|
||||||
return TypeDiff::Incompatible;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut diff = 0;
|
|
||||||
for ®_opnd in src_opnds.iter() {
|
|
||||||
match (self.get_reg(reg_opnd), dst.get_reg(reg_opnd)) {
|
|
||||||
(Some(src_idx), Some(dst_idx)) => if src_idx != dst_idx {
|
|
||||||
diff += 1;
|
|
||||||
}
|
|
||||||
_ => return TypeDiff::Incompatible,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
TypeDiff::Compatible(diff)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2240,13 +2224,12 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
|
|||||||
return best_version;
|
return best_version;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Basically find_block_version() but allows RegMapping incompatibility
|
/// Find the closest RegMapping among ones that have already been compiled.
|
||||||
/// that can be fixed by register moves and returns Context
|
pub fn find_most_compatible_reg_mapping(blockid: BlockId, ctx: &Context) -> Option<RegMapping> {
|
||||||
pub fn find_block_ctx_with_same_regs(blockid: BlockId, ctx: &Context) -> Option<Context> {
|
|
||||||
let versions = get_version_list(blockid)?;
|
let versions = get_version_list(blockid)?;
|
||||||
|
|
||||||
// Best match found
|
// Best match found
|
||||||
let mut best_ctx: Option<Context> = None;
|
let mut best_mapping: Option<RegMapping> = None;
|
||||||
let mut best_diff = usize::MAX;
|
let mut best_diff = usize::MAX;
|
||||||
|
|
||||||
// For each version matching the blockid
|
// For each version matching the blockid
|
||||||
@ -2254,17 +2237,17 @@ pub fn find_block_ctx_with_same_regs(blockid: BlockId, ctx: &Context) -> Option<
|
|||||||
let block = unsafe { blockref.as_ref() };
|
let block = unsafe { blockref.as_ref() };
|
||||||
let block_ctx = Context::decode(block.ctx);
|
let block_ctx = Context::decode(block.ctx);
|
||||||
|
|
||||||
// Discover the best block that is compatible if we move registers
|
// Discover the best block that is compatible if we load/spill registers
|
||||||
match ctx.diff_with_same_regs(&block_ctx) {
|
match ctx.diff_allowing_reg_mismatch(&block_ctx) {
|
||||||
TypeDiff::Compatible(diff) if diff < best_diff => {
|
TypeDiff::Compatible(diff) if diff < best_diff => {
|
||||||
best_ctx = Some(block_ctx);
|
best_mapping = Some(block_ctx.get_reg_mapping());
|
||||||
best_diff = diff;
|
best_diff = diff;
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
best_ctx
|
best_mapping
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Allow inlining a Block up to MAX_INLINE_VERSIONS times.
|
/// Allow inlining a Block up to MAX_INLINE_VERSIONS times.
|
||||||
@ -2596,6 +2579,14 @@ impl Context {
|
|||||||
self.sp_opnd(-ep_offset + offset)
|
self.sp_opnd(-ep_offset + offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Start using a register for a given stack temp or a local.
|
||||||
|
pub fn alloc_reg(&mut self, opnd: RegOpnd) {
|
||||||
|
let mut reg_mapping = self.get_reg_mapping();
|
||||||
|
if reg_mapping.alloc_reg(opnd) {
|
||||||
|
self.set_reg_mapping(reg_mapping);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Stop using a register for a given stack temp or a local.
|
/// Stop using a register for a given stack temp or a local.
|
||||||
/// This allows us to reuse the register for a value that we know is dead
|
/// This allows us to reuse the register for a value that we know is dead
|
||||||
/// and will no longer be used (e.g. popped stack temp).
|
/// and will no longer be used (e.g. popped stack temp).
|
||||||
@ -2898,19 +2889,26 @@ impl Context {
|
|||||||
return TypeDiff::Compatible(diff);
|
return TypeDiff::Compatible(diff);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Basically diff() but allows RegMapping incompatibility that can be fixed
|
/// Basically diff() but allows RegMapping incompatibility that could be fixed by
|
||||||
/// by register moves.
|
/// spilling, loading, or shuffling registers.
|
||||||
pub fn diff_with_same_regs(&self, dst: &Context) -> TypeDiff {
|
pub fn diff_allowing_reg_mismatch(&self, dst: &Context) -> TypeDiff {
|
||||||
|
// We shuffle only RegOpnd::Local and spill any other RegOpnd::Stack.
|
||||||
|
// If dst has RegOpnd::Stack, we can't reuse the block as a callee.
|
||||||
|
for reg_opnd in dst.get_reg_mapping().get_reg_opnds() {
|
||||||
|
if matches!(reg_opnd, RegOpnd::Stack(_)) {
|
||||||
|
return TypeDiff::Incompatible;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Prepare a Context with the same registers
|
// Prepare a Context with the same registers
|
||||||
let mut dst_with_same_regs = dst.clone();
|
let mut dst_with_same_regs = dst.clone();
|
||||||
dst_with_same_regs.set_reg_mapping(self.get_reg_mapping());
|
dst_with_same_regs.set_reg_mapping(self.get_reg_mapping());
|
||||||
|
|
||||||
// Diff registers and other stuff separately, and merge them
|
// Diff registers and other stuff separately, and merge them
|
||||||
match (self.diff(&dst_with_same_regs), self.get_reg_mapping().diff(dst.get_reg_mapping())) {
|
if let TypeDiff::Compatible(ctx_diff) = self.diff(&dst_with_same_regs) {
|
||||||
(TypeDiff::Compatible(ctx_diff), TypeDiff::Compatible(reg_diff)) => {
|
TypeDiff::Compatible(ctx_diff + self.get_reg_mapping().diff(dst.get_reg_mapping()))
|
||||||
TypeDiff::Compatible(ctx_diff + reg_diff)
|
} else {
|
||||||
}
|
TypeDiff::Incompatible
|
||||||
_ => TypeDiff::Incompatible
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user