YJIT: Spill/load argument registers to reuse blocks (#12287)

* YJIT: Spill/load argument registers to reuse blocks

* Mention the immediate function name

* Explain the context behind spill/load operations
This commit is contained in:
Takashi Kokubun 2024-12-09 10:02:40 -08:00 committed by GitHub
parent 93f8de777f
commit cff031253f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
Notes: git 2024-12-09 18:02:58 +00:00
Merged-By: k0kubun <takashikkbn@gmail.com>
3 changed files with 122 additions and 92 deletions

View File

@ -1317,7 +1317,7 @@ impl Assembler
} }
/// Spill a stack temp from a register to the stack /// Spill a stack temp from a register to the stack
fn spill_reg(&mut self, opnd: Opnd) { pub fn spill_reg(&mut self, opnd: Opnd) {
assert_ne!(self.ctx.get_reg_mapping().get_reg(opnd.reg_opnd()), None); assert_ne!(self.ctx.get_reg_mapping().get_reg(opnd.reg_opnd()), None);
// Use different RegMappings for dest and src operands // Use different RegMappings for dest and src operands

View File

@ -8136,53 +8136,16 @@ fn gen_send_iseq(
pc: None, // We are calling into jitted code, which will set the PC as necessary pc: None, // We are calling into jitted code, which will set the PC as necessary
})); }));
// Create a context for the callee
let mut callee_ctx = Context::default();
// Transfer some stack temp registers to the callee's locals for arguments.
let mapped_temps = if !forwarding {
asm.map_temp_regs_to_args(&mut callee_ctx, argc)
} else {
// When forwarding, the callee's local table has only a callinfo,
// so we can't map the actual arguments to the callee's locals.
vec![]
};
// Spill stack temps and locals that are not used by the callee.
// This must be done before changing the SP register.
asm.spill_regs_except(&mapped_temps);
// Saving SP before calculating ep avoids a dependency on a register
// However this must be done after referencing frame.recv, which may be SP-relative
asm.mov(SP, callee_sp);
// Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs.
// We also do this after gen_push_frame() to minimize the impact of spill_temps() on asm.ccall().
if get_option!(gen_stats) {
// Protect caller-saved registers in case they're used for arguments
asm.cpush_all();
// Assemble the ISEQ name string
let name_str = get_iseq_name(iseq);
// Get an index for this ISEQ name
let iseq_idx = get_iseq_idx(&name_str);
// Increment the counter for this cfunc
asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]);
asm.cpop_all();
}
// No need to set cfp->pc since the callee sets it whenever calling into routines // No need to set cfp->pc since the callee sets it whenever calling into routines
// that could look at it through jit_save_pc(). // that could look at it through jit_save_pc().
// mov(cb, REG0, const_ptr_opnd(start_pc)); // mov(cb, REG0, const_ptr_opnd(start_pc));
// mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), REG0); // mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), REG0);
// Stub so we can return to JITted code // Create a blockid for the callee
let return_block = BlockId { let callee_blockid = BlockId { iseq, idx: start_pc_offset };
iseq: jit.iseq,
idx: jit.next_insn_idx(), // Create a context for the callee
}; let mut callee_ctx = Context::default();
// If the callee has :inline_block annotation and the callsite has a block ISEQ, // If the callee has :inline_block annotation and the callsite has a block ISEQ,
// duplicate a callee block for each block ISEQ to make its `yield` monomorphic. // duplicate a callee block for each block ISEQ to make its `yield` monomorphic.
@ -8211,29 +8174,92 @@ fn gen_send_iseq(
}; };
callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type); callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type);
// Now that callee_ctx is prepared, discover a block that can be reused if we move some registers. // Spill or preserve argument registers
// If there's such a block, move registers accordingly to avoid creating a new block. if forwarding {
let blockid = BlockId { iseq, idx: start_pc_offset }; // When forwarding, the callee's local table has only a callinfo,
if !mapped_temps.is_empty() { // so we can't map the actual arguments to the callee's locals.
// Discover a block that have the same things in different (or same) registers asm.spill_regs();
if let Some(block_ctx) = find_block_ctx_with_same_regs(blockid, &callee_ctx) { } else {
// List pairs of moves for making the register mappings compatible // Discover stack temp registers that can be used as the callee's locals
let mapped_temps = asm.map_temp_regs_to_args(&mut callee_ctx, argc);
// Spill stack temps and locals that are not used by the callee.
// This must be done before changing the SP register.
asm.spill_regs_except(&mapped_temps);
// If the callee block has been compiled before, spill/move registers to reuse the existing block
// for minimizing the number of blocks we need to compile.
if let Some(existing_reg_mapping) = find_most_compatible_reg_mapping(callee_blockid, &callee_ctx) {
asm_comment!(asm, "reuse maps: {:?} -> {:?}", callee_ctx.get_reg_mapping(), existing_reg_mapping);
// Spill the registers that are not used in the existing block.
// When the same ISEQ is compiled as an entry block, it starts with no registers allocated.
for &reg_opnd in callee_ctx.get_reg_mapping().get_reg_opnds().iter() {
if existing_reg_mapping.get_reg(reg_opnd).is_none() {
match reg_opnd {
RegOpnd::Local(local_idx) => {
let spilled_temp = asm.stack_opnd(argc - local_idx as i32 - 1);
asm.spill_reg(spilled_temp);
callee_ctx.dealloc_reg(reg_opnd);
}
RegOpnd::Stack(_) => unreachable!("callee {:?} should have been spilled", reg_opnd),
}
}
}
assert!(callee_ctx.get_reg_mapping().get_reg_opnds().len() <= existing_reg_mapping.get_reg_opnds().len());
// Load the registers that are spilled in this block but used in the existing block.
// When there are multiple callsites, some registers spilled in this block may be used at other callsites.
for &reg_opnd in existing_reg_mapping.get_reg_opnds().iter() {
if callee_ctx.get_reg_mapping().get_reg(reg_opnd).is_none() {
match reg_opnd {
RegOpnd::Local(local_idx) => {
callee_ctx.alloc_reg(reg_opnd);
let loaded_reg = TEMP_REGS[callee_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()];
let loaded_temp = asm.stack_opnd(argc - local_idx as i32 - 1);
asm.load_into(Opnd::Reg(loaded_reg), loaded_temp);
}
RegOpnd::Stack(_) => unreachable!("find_most_compatible_reg_mapping should not leave {:?}", reg_opnd),
}
}
}
assert_eq!(callee_ctx.get_reg_mapping().get_reg_opnds().len(), existing_reg_mapping.get_reg_opnds().len());
// Shuffle registers to make the register mappings compatible
let mut moves = vec![]; let mut moves = vec![];
for &reg_opnd in callee_ctx.get_reg_mapping().get_reg_opnds().iter() { for &reg_opnd in callee_ctx.get_reg_mapping().get_reg_opnds().iter() {
let old_reg = TEMP_REGS[callee_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()]; let old_reg = TEMP_REGS[callee_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()];
let new_reg = TEMP_REGS[block_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()]; let new_reg = TEMP_REGS[existing_reg_mapping.get_reg(reg_opnd).unwrap()];
moves.push((new_reg, Opnd::Reg(old_reg))); moves.push((new_reg, Opnd::Reg(old_reg)));
} }
for (reg, opnd) in Assembler::reorder_reg_moves(&moves) {
// Shuffle them to break cycles and generate the moves
let moves = Assembler::reorder_reg_moves(&moves);
for (reg, opnd) in moves {
asm.load_into(Opnd::Reg(reg), opnd); asm.load_into(Opnd::Reg(reg), opnd);
} }
callee_ctx.set_reg_mapping(block_ctx.get_reg_mapping()); callee_ctx.set_reg_mapping(existing_reg_mapping);
} }
} }
// Update SP register for the callee. This must be done after referencing frame.recv,
// which may be SP-relative.
asm.mov(SP, callee_sp);
// Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs.
// We also do this after spill_regs() to avoid doubly spilling the same thing on asm.ccall().
if get_option!(gen_stats) {
// Protect caller-saved registers in case they're used for arguments
asm.cpush_all();
// Assemble the ISEQ name string
let name_str = get_iseq_name(iseq);
// Get an index for this ISEQ name
let iseq_idx = get_iseq_idx(&name_str);
// Increment the counter for this cfunc
asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]);
asm.cpop_all();
}
// The callee might change locals through Kernel#binding and other means. // The callee might change locals through Kernel#binding and other means.
asm.clear_local_types(); asm.clear_local_types();
@ -8246,6 +8272,12 @@ fn gen_send_iseq(
return_asm.ctx.reset_chain_depth_and_defer(); return_asm.ctx.reset_chain_depth_and_defer();
return_asm.ctx.set_as_return_landing(); return_asm.ctx.set_as_return_landing();
// Stub so we can return to JITted code
let return_block = BlockId {
iseq: jit.iseq,
idx: jit.next_insn_idx(),
};
// Write the JIT return address on the callee frame // Write the JIT return address on the callee frame
jit.gen_branch( jit.gen_branch(
asm, asm,
@ -8266,7 +8298,7 @@ fn gen_send_iseq(
gen_direct_jump( gen_direct_jump(
jit, jit,
&callee_ctx, &callee_ctx,
blockid, callee_blockid,
asm, asm,
); );

View File

@ -447,25 +447,9 @@ impl RegMapping {
self.0.iter().filter_map(|&reg_opnd| reg_opnd).collect() self.0.iter().filter_map(|&reg_opnd| reg_opnd).collect()
} }
/// Return TypeDiff::Compatible(diff) if dst has a mapping that can be made by moving registers /// Count the number of registers that store a different operand from `dst`.
/// in self `diff` times. TypeDiff::Incompatible if they have different things in registers. pub fn diff(&self, dst: RegMapping) -> usize {
pub fn diff(&self, dst: RegMapping) -> TypeDiff { self.0.iter().enumerate().filter(|&(reg_idx, &reg)| reg != dst.0[reg_idx]).count()
let src_opnds = self.get_reg_opnds();
let dst_opnds = dst.get_reg_opnds();
if src_opnds.len() != dst_opnds.len() {
return TypeDiff::Incompatible;
}
let mut diff = 0;
for &reg_opnd in src_opnds.iter() {
match (self.get_reg(reg_opnd), dst.get_reg(reg_opnd)) {
(Some(src_idx), Some(dst_idx)) => if src_idx != dst_idx {
diff += 1;
}
_ => return TypeDiff::Incompatible,
}
}
TypeDiff::Compatible(diff)
} }
} }
@ -2240,13 +2224,12 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
return best_version; return best_version;
} }
/// Basically find_block_version() but allows RegMapping incompatibility /// Find the closest RegMapping among ones that have already been compiled.
/// that can be fixed by register moves and returns Context pub fn find_most_compatible_reg_mapping(blockid: BlockId, ctx: &Context) -> Option<RegMapping> {
pub fn find_block_ctx_with_same_regs(blockid: BlockId, ctx: &Context) -> Option<Context> {
let versions = get_version_list(blockid)?; let versions = get_version_list(blockid)?;
// Best match found // Best match found
let mut best_ctx: Option<Context> = None; let mut best_mapping: Option<RegMapping> = None;
let mut best_diff = usize::MAX; let mut best_diff = usize::MAX;
// For each version matching the blockid // For each version matching the blockid
@ -2254,17 +2237,17 @@ pub fn find_block_ctx_with_same_regs(blockid: BlockId, ctx: &Context) -> Option<
let block = unsafe { blockref.as_ref() }; let block = unsafe { blockref.as_ref() };
let block_ctx = Context::decode(block.ctx); let block_ctx = Context::decode(block.ctx);
// Discover the best block that is compatible if we move registers // Discover the best block that is compatible if we load/spill registers
match ctx.diff_with_same_regs(&block_ctx) { match ctx.diff_allowing_reg_mismatch(&block_ctx) {
TypeDiff::Compatible(diff) if diff < best_diff => { TypeDiff::Compatible(diff) if diff < best_diff => {
best_ctx = Some(block_ctx); best_mapping = Some(block_ctx.get_reg_mapping());
best_diff = diff; best_diff = diff;
} }
_ => {} _ => {}
} }
} }
best_ctx best_mapping
} }
/// Allow inlining a Block up to MAX_INLINE_VERSIONS times. /// Allow inlining a Block up to MAX_INLINE_VERSIONS times.
@ -2596,6 +2579,14 @@ impl Context {
self.sp_opnd(-ep_offset + offset) self.sp_opnd(-ep_offset + offset)
} }
/// Start using a register for a given stack temp or a local.
pub fn alloc_reg(&mut self, opnd: RegOpnd) {
let mut reg_mapping = self.get_reg_mapping();
if reg_mapping.alloc_reg(opnd) {
self.set_reg_mapping(reg_mapping);
}
}
/// Stop using a register for a given stack temp or a local. /// Stop using a register for a given stack temp or a local.
/// This allows us to reuse the register for a value that we know is dead /// This allows us to reuse the register for a value that we know is dead
/// and will no longer be used (e.g. popped stack temp). /// and will no longer be used (e.g. popped stack temp).
@ -2898,19 +2889,26 @@ impl Context {
return TypeDiff::Compatible(diff); return TypeDiff::Compatible(diff);
} }
/// Basically diff() but allows RegMapping incompatibility that can be fixed /// Basically diff() but allows RegMapping incompatibility that could be fixed by
/// by register moves. /// spilling, loading, or shuffling registers.
pub fn diff_with_same_regs(&self, dst: &Context) -> TypeDiff { pub fn diff_allowing_reg_mismatch(&self, dst: &Context) -> TypeDiff {
// We shuffle only RegOpnd::Local and spill any other RegOpnd::Stack.
// If dst has RegOpnd::Stack, we can't reuse the block as a callee.
for reg_opnd in dst.get_reg_mapping().get_reg_opnds() {
if matches!(reg_opnd, RegOpnd::Stack(_)) {
return TypeDiff::Incompatible;
}
}
// Prepare a Context with the same registers // Prepare a Context with the same registers
let mut dst_with_same_regs = dst.clone(); let mut dst_with_same_regs = dst.clone();
dst_with_same_regs.set_reg_mapping(self.get_reg_mapping()); dst_with_same_regs.set_reg_mapping(self.get_reg_mapping());
// Diff registers and other stuff separately, and merge them // Diff registers and other stuff separately, and merge them
match (self.diff(&dst_with_same_regs), self.get_reg_mapping().diff(dst.get_reg_mapping())) { if let TypeDiff::Compatible(ctx_diff) = self.diff(&dst_with_same_regs) {
(TypeDiff::Compatible(ctx_diff), TypeDiff::Compatible(reg_diff)) => { TypeDiff::Compatible(ctx_diff + self.get_reg_mapping().diff(dst.get_reg_mapping()))
TypeDiff::Compatible(ctx_diff + reg_diff) } else {
} TypeDiff::Incompatible
_ => TypeDiff::Incompatible
} }
} }