YJIT: Pass method arguments using registers (#11280)
* YJIT: Pass method arguments using registers * s/at_current_insn/at_compile_target/ * Implement register shuffle
This commit is contained in:
parent
f2ac013009
commit
5b129c899a
Notes:
git
2024-08-28 00:05:03 +00:00
Merged-By: k0kubun <takashikkbn@gmail.com>
@ -2256,6 +2256,34 @@ assert_equal '7', %q{
|
||||
foo(5,2)
|
||||
}
|
||||
|
||||
# regression test for argument registers with invalidation
|
||||
assert_equal '[0, 1, 2]', %q{
|
||||
def test(n)
|
||||
ret = n
|
||||
binding
|
||||
ret
|
||||
end
|
||||
|
||||
[0, 1, 2].map do |n|
|
||||
test(n)
|
||||
end
|
||||
}
|
||||
|
||||
# regression test for argument registers
|
||||
assert_equal 'true', %q{
|
||||
class Foo
|
||||
def ==(other)
|
||||
other == nil
|
||||
end
|
||||
end
|
||||
|
||||
def test
|
||||
[Foo.new].include?(Foo.new)
|
||||
end
|
||||
|
||||
test
|
||||
}
|
||||
|
||||
# test pattern matching
|
||||
assert_equal '[:ok, :ok]', %q{
|
||||
class C
|
||||
|
@ -1032,7 +1032,7 @@ pub struct Assembler {
|
||||
pub ctx: Context,
|
||||
|
||||
/// The current ISEQ's local table size. asm.local_opnd() uses this, and it's
|
||||
/// sometimes hard to pass this value, e.g. asm.spill_temps() in asm.ccall().
|
||||
/// sometimes hard to pass this value, e.g. asm.spill_regs() in asm.ccall().
|
||||
///
|
||||
/// `None` means we're not assembling for an ISEQ, or that the local size is
|
||||
/// not relevant.
|
||||
@ -1241,8 +1241,36 @@ impl Assembler
|
||||
self.ctx.clear_local_types();
|
||||
}
|
||||
|
||||
/// Repurpose stack temp registers to the corresponding locals for arguments
|
||||
pub fn map_temp_regs_to_args(&mut self, callee_ctx: &mut Context, argc: i32) -> Vec<RegOpnd> {
|
||||
let mut callee_reg_mapping = callee_ctx.get_reg_mapping();
|
||||
let mut mapped_temps = vec![];
|
||||
|
||||
for arg_idx in 0..argc {
|
||||
let stack_idx: u8 = (self.ctx.get_stack_size() as i32 - argc + arg_idx).try_into().unwrap();
|
||||
let temp_opnd = RegOpnd::Stack(stack_idx);
|
||||
|
||||
// For each argument, if the stack temp for it has a register,
|
||||
// let the callee use the register for the local variable.
|
||||
if let Some(reg_idx) = self.ctx.get_reg_mapping().get_reg(temp_opnd) {
|
||||
let local_opnd = RegOpnd::Local(arg_idx.try_into().unwrap());
|
||||
callee_reg_mapping.set_reg(local_opnd, reg_idx);
|
||||
mapped_temps.push(temp_opnd);
|
||||
}
|
||||
}
|
||||
|
||||
asm_comment!(self, "local maps: {:?}", callee_reg_mapping);
|
||||
callee_ctx.set_reg_mapping(callee_reg_mapping);
|
||||
mapped_temps
|
||||
}
|
||||
|
||||
/// Spill all live registers to the stack
|
||||
pub fn spill_regs(&mut self) {
|
||||
self.spill_regs_except(&vec![]);
|
||||
}
|
||||
|
||||
/// Spill all live registers except `ignored_temps` to the stack
|
||||
pub fn spill_regs_except(&mut self, ignored_temps: &Vec<RegOpnd>) {
|
||||
// Forget registers above the stack top
|
||||
let mut reg_mapping = self.ctx.get_reg_mapping();
|
||||
for stack_idx in self.ctx.get_stack_size()..MAX_CTX_TEMPS as u8 {
|
||||
@ -1250,36 +1278,46 @@ impl Assembler
|
||||
}
|
||||
self.set_reg_mapping(reg_mapping);
|
||||
|
||||
// Spill live stack temps
|
||||
if self.ctx.get_reg_mapping() != RegMapping::default() {
|
||||
asm_comment!(self, "spill_temps: {:?} -> {:?}", self.ctx.get_reg_mapping(), RegMapping::default());
|
||||
|
||||
// Spill stack temps
|
||||
for stack_idx in 0..u8::min(MAX_CTX_TEMPS as u8, self.ctx.get_stack_size()) {
|
||||
if reg_mapping.dealloc_reg(RegOpnd::Stack(stack_idx)) {
|
||||
let idx = self.ctx.get_stack_size() - 1 - stack_idx;
|
||||
self.spill_temp(self.stack_opnd(idx.into()));
|
||||
}
|
||||
}
|
||||
|
||||
// Spill locals
|
||||
for local_idx in 0..MAX_CTX_TEMPS as u8 {
|
||||
if reg_mapping.dealloc_reg(RegOpnd::Local(local_idx)) {
|
||||
let first_local_ep_offset = self.num_locals.unwrap() + VM_ENV_DATA_SIZE - 1;
|
||||
let ep_offset = first_local_ep_offset - local_idx as u32;
|
||||
self.spill_temp(self.local_opnd(ep_offset));
|
||||
}
|
||||
}
|
||||
|
||||
self.ctx.set_reg_mapping(reg_mapping);
|
||||
// If no registers are in use, skip all checks
|
||||
if self.ctx.get_reg_mapping() == RegMapping::default() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Every stack temp should have been spilled
|
||||
assert_eq!(self.ctx.get_reg_mapping(), RegMapping::default());
|
||||
// Collect stack temps to be spilled
|
||||
let mut spilled_opnds = vec![];
|
||||
for stack_idx in 0..u8::min(MAX_CTX_TEMPS as u8, self.ctx.get_stack_size()) {
|
||||
let reg_opnd = RegOpnd::Stack(stack_idx);
|
||||
if !ignored_temps.contains(®_opnd) && reg_mapping.dealloc_reg(reg_opnd) {
|
||||
let idx = self.ctx.get_stack_size() - 1 - stack_idx;
|
||||
let spilled_opnd = self.stack_opnd(idx.into());
|
||||
spilled_opnds.push(spilled_opnd);
|
||||
reg_mapping.dealloc_reg(spilled_opnd.reg_opnd());
|
||||
}
|
||||
}
|
||||
|
||||
// Collect locals to be spilled
|
||||
for local_idx in 0..MAX_CTX_TEMPS as u8 {
|
||||
if reg_mapping.dealloc_reg(RegOpnd::Local(local_idx)) {
|
||||
let first_local_ep_offset = self.num_locals.unwrap() + VM_ENV_DATA_SIZE - 1;
|
||||
let ep_offset = first_local_ep_offset - local_idx as u32;
|
||||
let spilled_opnd = self.local_opnd(ep_offset);
|
||||
spilled_opnds.push(spilled_opnd);
|
||||
reg_mapping.dealloc_reg(spilled_opnd.reg_opnd());
|
||||
}
|
||||
}
|
||||
|
||||
// Spill stack temps and locals
|
||||
if !spilled_opnds.is_empty() {
|
||||
asm_comment!(self, "spill_regs: {:?} -> {:?}", self.ctx.get_reg_mapping(), reg_mapping);
|
||||
for &spilled_opnd in spilled_opnds.iter() {
|
||||
self.spill_reg(spilled_opnd);
|
||||
}
|
||||
self.ctx.set_reg_mapping(reg_mapping);
|
||||
}
|
||||
}
|
||||
|
||||
/// Spill a stack temp from a register to the stack
|
||||
fn spill_temp(&mut self, opnd: Opnd) {
|
||||
fn spill_reg(&mut self, opnd: Opnd) {
|
||||
assert_ne!(self.ctx.get_reg_mapping().get_reg(opnd.reg_opnd()), None);
|
||||
|
||||
// Use different RegMappings for dest and src operands
|
||||
@ -1308,6 +1346,42 @@ impl Assembler
|
||||
}
|
||||
}
|
||||
|
||||
// Shuffle register moves, sometimes adding extra moves using SCRATCH_REG,
|
||||
// so that they will not rewrite each other before they are used.
|
||||
pub fn reorder_reg_moves(old_moves: &Vec<(Reg, Opnd)>) -> Vec<(Reg, Opnd)> {
|
||||
// Return the index of a move whose destination is not used as a source if any.
|
||||
fn find_safe_move(moves: &Vec<(Reg, Opnd)>) -> Option<usize> {
|
||||
moves.iter().enumerate().find(|(_, &(dest_reg, _))| {
|
||||
moves.iter().all(|&(_, src_opnd)| src_opnd != Opnd::Reg(dest_reg))
|
||||
}).map(|(index, _)| index)
|
||||
}
|
||||
|
||||
// Remove moves whose source and destination are the same
|
||||
let mut old_moves: Vec<(Reg, Opnd)> = old_moves.clone().into_iter()
|
||||
.filter(|&(reg, opnd)| Opnd::Reg(reg) != opnd).collect();
|
||||
|
||||
let mut new_moves = vec![];
|
||||
while old_moves.len() > 0 {
|
||||
// Keep taking safe moves
|
||||
while let Some(index) = find_safe_move(&old_moves) {
|
||||
new_moves.push(old_moves.remove(index));
|
||||
}
|
||||
|
||||
// No safe move. Load the source of one move into SCRATCH_REG, and
|
||||
// then load SCRATCH_REG into the destination when it's safe.
|
||||
if old_moves.len() > 0 {
|
||||
// Make sure it's safe to use SCRATCH_REG
|
||||
assert!(old_moves.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG)));
|
||||
|
||||
// Move SCRATCH <- opnd, and delay reg <- SCRATCH
|
||||
let (reg, opnd) = old_moves.remove(0);
|
||||
new_moves.push((Assembler::SCRATCH_REG, opnd));
|
||||
old_moves.push((reg, Opnd::Reg(Assembler::SCRATCH_REG)));
|
||||
}
|
||||
}
|
||||
new_moves
|
||||
}
|
||||
|
||||
/// Sets the out field on the various instructions that require allocated
|
||||
/// registers because their output is used as the operand on a subsequent
|
||||
/// instruction. This is our implementation of the linear scan algorithm.
|
||||
@ -1353,42 +1427,6 @@ impl Assembler
|
||||
}
|
||||
}
|
||||
|
||||
// Reorder C argument moves, sometimes adding extra moves using SCRATCH_REG,
|
||||
// so that they will not rewrite each other before they are used.
|
||||
fn reorder_c_args(c_args: &Vec<(Reg, Opnd)>) -> Vec<(Reg, Opnd)> {
|
||||
// Return the index of a move whose destination is not used as a source if any.
|
||||
fn find_safe_arg(c_args: &Vec<(Reg, Opnd)>) -> Option<usize> {
|
||||
c_args.iter().enumerate().find(|(_, &(dest_reg, _))| {
|
||||
c_args.iter().all(|&(_, src_opnd)| src_opnd != Opnd::Reg(dest_reg))
|
||||
}).map(|(index, _)| index)
|
||||
}
|
||||
|
||||
// Remove moves whose source and destination are the same
|
||||
let mut c_args: Vec<(Reg, Opnd)> = c_args.clone().into_iter()
|
||||
.filter(|&(reg, opnd)| Opnd::Reg(reg) != opnd).collect();
|
||||
|
||||
let mut moves = vec![];
|
||||
while c_args.len() > 0 {
|
||||
// Keep taking safe moves
|
||||
while let Some(index) = find_safe_arg(&c_args) {
|
||||
moves.push(c_args.remove(index));
|
||||
}
|
||||
|
||||
// No safe move. Load the source of one move into SCRATCH_REG, and
|
||||
// then load SCRATCH_REG into the destination when it's safe.
|
||||
if c_args.len() > 0 {
|
||||
// Make sure it's safe to use SCRATCH_REG
|
||||
assert!(c_args.iter().all(|&(_, opnd)| opnd != Opnd::Reg(Assembler::SCRATCH_REG)));
|
||||
|
||||
// Move SCRATCH <- opnd, and delay reg <- SCRATCH
|
||||
let (reg, opnd) = c_args.remove(0);
|
||||
moves.push((Assembler::SCRATCH_REG, opnd));
|
||||
c_args.push((reg, Opnd::Reg(Assembler::SCRATCH_REG)));
|
||||
}
|
||||
}
|
||||
moves
|
||||
}
|
||||
|
||||
// Adjust the number of entries in live_ranges so that it can be indexed by mapped indexes.
|
||||
fn shift_live_ranges(live_ranges: &mut Vec<usize>, start_index: usize, shift_offset: isize) {
|
||||
if shift_offset >= 0 {
|
||||
@ -1564,7 +1602,7 @@ impl Assembler
|
||||
if c_args.len() > 0 {
|
||||
// Resolve C argument dependencies
|
||||
let c_args_len = c_args.len() as isize;
|
||||
let moves = reorder_c_args(&c_args.drain(..).into_iter().collect());
|
||||
let moves = Self::reorder_reg_moves(&c_args.drain(..).into_iter().collect());
|
||||
shift_live_ranges(&mut shifted_live_ranges, asm.insns.len(), moves.len() as isize - c_args_len);
|
||||
|
||||
// Push batched C arguments
|
||||
@ -1808,7 +1846,7 @@ impl Assembler {
|
||||
// Mark all temps as not being in registers.
|
||||
// Temps will be marked back as being in registers by cpop_all.
|
||||
// We assume that cpush_all + cpop_all are used for C functions in utils.rs
|
||||
// that don't require spill_temps for GC.
|
||||
// that don't require spill_regs for GC.
|
||||
self.set_reg_mapping(RegMapping::default());
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
use crate::asm::*;
|
||||
use crate::backend::ir::*;
|
||||
use crate::backend::current::TEMP_REGS;
|
||||
use crate::core::*;
|
||||
use crate::cruby::*;
|
||||
use crate::invariants::*;
|
||||
@ -114,10 +115,13 @@ pub struct JITState<'a> {
|
||||
|
||||
/// Stack of symbol names for --yjit-perf
|
||||
perf_stack: Vec<String>,
|
||||
|
||||
/// When true, this block is the first block compiled by gen_block_series().
|
||||
first_block: bool,
|
||||
}
|
||||
|
||||
impl<'a> JITState<'a> {
|
||||
pub fn new(blockid: BlockId, starting_ctx: Context, output_ptr: CodePtr, ec: EcPtr, ocb: &'a mut OutlinedCb) -> Self {
|
||||
pub fn new(blockid: BlockId, starting_ctx: Context, output_ptr: CodePtr, ec: EcPtr, ocb: &'a mut OutlinedCb, first_block: bool) -> Self {
|
||||
JITState {
|
||||
iseq: blockid.iseq,
|
||||
starting_insn_idx: blockid.idx,
|
||||
@ -140,6 +144,7 @@ impl<'a> JITState<'a> {
|
||||
block_assumes_single_ractor: false,
|
||||
perf_map: Rc::default(),
|
||||
perf_stack: vec![],
|
||||
first_block,
|
||||
}
|
||||
}
|
||||
|
||||
@ -212,9 +217,16 @@ impl<'a> JITState<'a> {
|
||||
self.next_insn_idx() + insn_len(next_opcode) as u16
|
||||
}
|
||||
|
||||
// Check if we are compiling the instruction at the stub PC
|
||||
// Check if we are compiling the instruction at the stub PC with the target Context
|
||||
// Meaning we are compiling the instruction that is next to execute
|
||||
pub fn at_current_insn(&self) -> bool {
|
||||
pub fn at_compile_target(&self) -> bool {
|
||||
// If this is not the first block compiled by gen_block_series(),
|
||||
// it might be compiling the same block again with a different Context.
|
||||
// In that case, it should defer_compilation() and inspect the stack there.
|
||||
if !self.first_block {
|
||||
return false;
|
||||
}
|
||||
|
||||
let ec_pc: *mut VALUE = unsafe { get_cfp_pc(self.get_cfp()) };
|
||||
ec_pc == self.pc
|
||||
}
|
||||
@ -222,7 +234,7 @@ impl<'a> JITState<'a> {
|
||||
// Peek at the nth topmost value on the Ruby stack.
|
||||
// Returns the topmost value when n == 0.
|
||||
pub fn peek_at_stack(&self, ctx: &Context, n: isize) -> VALUE {
|
||||
assert!(self.at_current_insn());
|
||||
assert!(self.at_compile_target());
|
||||
assert!(n < ctx.get_stack_size() as isize);
|
||||
|
||||
// Note: this does not account for ctx->sp_offset because
|
||||
@ -241,7 +253,7 @@ impl<'a> JITState<'a> {
|
||||
}
|
||||
|
||||
fn peek_at_local(&self, n: i32) -> VALUE {
|
||||
assert!(self.at_current_insn());
|
||||
assert!(self.at_compile_target());
|
||||
|
||||
let local_table_size: isize = unsafe { get_iseq_body_local_table_size(self.iseq) }
|
||||
.try_into()
|
||||
@ -257,7 +269,7 @@ impl<'a> JITState<'a> {
|
||||
}
|
||||
|
||||
fn peek_at_block_handler(&self, level: u32) -> VALUE {
|
||||
assert!(self.at_current_insn());
|
||||
assert!(self.at_compile_target());
|
||||
|
||||
unsafe {
|
||||
let ep = get_cfp_ep_level(self.get_cfp(), level);
|
||||
@ -656,7 +668,7 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
|
||||
}
|
||||
|
||||
// Only able to check types when at current insn
|
||||
assert!(jit.at_current_insn());
|
||||
assert!(jit.at_compile_target());
|
||||
|
||||
let self_val = jit.peek_at_self();
|
||||
let self_val_type = Type::from(self_val);
|
||||
@ -1172,6 +1184,7 @@ pub fn gen_single_block(
|
||||
ec: EcPtr,
|
||||
cb: &mut CodeBlock,
|
||||
ocb: &mut OutlinedCb,
|
||||
first_block: bool,
|
||||
) -> Result<BlockRef, ()> {
|
||||
// Limit the number of specialized versions for this block
|
||||
let ctx = limit_block_versions(blockid, start_ctx);
|
||||
@ -1195,7 +1208,7 @@ pub fn gen_single_block(
|
||||
let mut insn_idx: IseqIdx = blockid.idx;
|
||||
|
||||
// Initialize a JIT state object
|
||||
let mut jit = JITState::new(blockid, ctx, cb.get_write_ptr(), ec, ocb);
|
||||
let mut jit = JITState::new(blockid, ctx, cb.get_write_ptr(), ec, ocb, first_block);
|
||||
jit.iseq = blockid.iseq;
|
||||
|
||||
// Create a backend assembler instance
|
||||
@ -1265,7 +1278,7 @@ pub fn gen_single_block(
|
||||
}
|
||||
|
||||
// In debug mode, verify our existing assumption
|
||||
if cfg!(debug_assertions) && get_option!(verify_ctx) && jit.at_current_insn() {
|
||||
if cfg!(debug_assertions) && get_option!(verify_ctx) && jit.at_compile_target() {
|
||||
verify_ctx(&jit, &asm.ctx);
|
||||
}
|
||||
|
||||
@ -1508,7 +1521,7 @@ fn fuse_putobject_opt_ltlt(
|
||||
if shift_amt > 63 || shift_amt < 0 {
|
||||
return None;
|
||||
}
|
||||
if !jit.at_current_insn() {
|
||||
if !jit.at_compile_target() {
|
||||
defer_compilation(jit, asm);
|
||||
return Some(EndBlock);
|
||||
}
|
||||
@ -1772,7 +1785,7 @@ fn gen_splatkw(
|
||||
asm: &mut Assembler,
|
||||
) -> Option<CodegenStatus> {
|
||||
// Defer compilation so we can specialize on a runtime hash operand
|
||||
if !jit.at_current_insn() {
|
||||
if !jit.at_compile_target() {
|
||||
defer_compilation(jit, asm);
|
||||
return Some(EndBlock);
|
||||
}
|
||||
@ -2146,7 +2159,7 @@ fn gen_expandarray(
|
||||
let array_opnd = asm.stack_opnd(0);
|
||||
|
||||
// Defer compilation so we can specialize on a runtime `self`
|
||||
if !jit.at_current_insn() {
|
||||
if !jit.at_compile_target() {
|
||||
defer_compilation(jit, asm);
|
||||
return Some(EndBlock);
|
||||
}
|
||||
@ -2345,7 +2358,16 @@ fn gen_getlocal_generic(
|
||||
// Load the local from the block
|
||||
// val = *(vm_get_ep(GET_EP(), level) - idx);
|
||||
let offs = -(SIZEOF_VALUE_I32 * ep_offset as i32);
|
||||
Opnd::mem(64, ep_opnd, offs)
|
||||
let local_opnd = Opnd::mem(64, ep_opnd, offs);
|
||||
|
||||
// Write back an argument register to the stack. If the local variable
|
||||
// is an argument, it might have an allocated register, but if this ISEQ
|
||||
// is known to escape EP, the register shouldn't be used after this getlocal.
|
||||
if level == 0 && asm.ctx.get_reg_mapping().get_reg(asm.local_opnd(ep_offset).reg_opnd()).is_some() {
|
||||
asm.mov(local_opnd, asm.local_opnd(ep_offset));
|
||||
}
|
||||
|
||||
local_opnd
|
||||
};
|
||||
|
||||
// Write the local at SP
|
||||
@ -2425,6 +2447,13 @@ fn gen_setlocal_generic(
|
||||
asm.alloc_reg(local_opnd.reg_opnd());
|
||||
(flags_opnd, local_opnd)
|
||||
} else {
|
||||
// Make sure getlocal doesn't read a stale register. If the local variable
|
||||
// is an argument, it might have an allocated register, but if this ISEQ
|
||||
// is known to escape EP, the register shouldn't be used after this setlocal.
|
||||
if level == 0 {
|
||||
asm.ctx.dealloc_reg(asm.local_opnd(ep_offset).reg_opnd());
|
||||
}
|
||||
|
||||
// Load flags and the local for the level
|
||||
let ep_opnd = gen_get_ep(asm, level);
|
||||
let flags_opnd = Opnd::mem(
|
||||
@ -2627,11 +2656,11 @@ fn gen_checkkeyword(
|
||||
// The index of the keyword we want to check
|
||||
let index: i64 = jit.get_arg(1).as_i64();
|
||||
|
||||
// Load environment pointer EP
|
||||
let ep_opnd = gen_get_ep(asm, 0);
|
||||
|
||||
// VALUE kw_bits = *(ep - bits);
|
||||
let bits_opnd = Opnd::mem(64, ep_opnd, SIZEOF_VALUE_I32 * -bits_offset);
|
||||
// `unspecified_bits` is a part of the local table. Therefore, we may allocate a register for
|
||||
// that "local" when passing it as an argument. We must use such a register to avoid loading
|
||||
// random bits from the stack if any. We assume that EP is not escaped as of entering a method
|
||||
// with keyword arguments.
|
||||
let bits_opnd = asm.local_opnd(bits_offset as u32);
|
||||
|
||||
// unsigned int b = (unsigned int)FIX2ULONG(kw_bits);
|
||||
// if ((b & (0x01 << idx))) {
|
||||
@ -2846,7 +2875,7 @@ fn gen_getinstancevariable(
|
||||
asm: &mut Assembler,
|
||||
) -> Option<CodegenStatus> {
|
||||
// Defer compilation so we can specialize on a runtime `self`
|
||||
if !jit.at_current_insn() {
|
||||
if !jit.at_compile_target() {
|
||||
defer_compilation(jit, asm);
|
||||
return Some(EndBlock);
|
||||
}
|
||||
@ -2910,7 +2939,7 @@ fn gen_setinstancevariable(
|
||||
asm: &mut Assembler,
|
||||
) -> Option<CodegenStatus> {
|
||||
// Defer compilation so we can specialize on a runtime `self`
|
||||
if !jit.at_current_insn() {
|
||||
if !jit.at_compile_target() {
|
||||
defer_compilation(jit, asm);
|
||||
return Some(EndBlock);
|
||||
}
|
||||
@ -3221,7 +3250,7 @@ fn gen_definedivar(
|
||||
asm: &mut Assembler,
|
||||
) -> Option<CodegenStatus> {
|
||||
// Defer compilation so we can specialize base on a runtime receiver
|
||||
if !jit.at_current_insn() {
|
||||
if !jit.at_compile_target() {
|
||||
defer_compilation(jit, asm);
|
||||
return Some(EndBlock);
|
||||
}
|
||||
@ -3550,7 +3579,7 @@ fn gen_equality_specialized(
|
||||
return Some(true);
|
||||
}
|
||||
|
||||
if !jit.at_current_insn() {
|
||||
if !jit.at_compile_target() {
|
||||
return None;
|
||||
}
|
||||
let comptime_a = jit.peek_at_stack(&asm.ctx, 1);
|
||||
@ -3669,7 +3698,7 @@ fn gen_opt_aref(
|
||||
}
|
||||
|
||||
// Defer compilation so we can specialize base on a runtime receiver
|
||||
if !jit.at_current_insn() {
|
||||
if !jit.at_compile_target() {
|
||||
defer_compilation(jit, asm);
|
||||
return Some(EndBlock);
|
||||
}
|
||||
@ -3770,7 +3799,7 @@ fn gen_opt_aset(
|
||||
asm: &mut Assembler,
|
||||
) -> Option<CodegenStatus> {
|
||||
// Defer compilation so we can specialize on a runtime `self`
|
||||
if !jit.at_current_insn() {
|
||||
if !jit.at_compile_target() {
|
||||
defer_compilation(jit, asm);
|
||||
return Some(EndBlock);
|
||||
}
|
||||
@ -4376,7 +4405,7 @@ fn gen_opt_case_dispatch(
|
||||
// We'd hope that our jitted code will be sufficiently fast without the
|
||||
// hash lookup, at least for small hashes, but it's worth revisiting this
|
||||
// assumption in the future.
|
||||
if !jit.at_current_insn() {
|
||||
if !jit.at_compile_target() {
|
||||
defer_compilation(jit, asm);
|
||||
return Some(EndBlock);
|
||||
}
|
||||
@ -6433,14 +6462,6 @@ fn gen_push_frame(
|
||||
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_SELF), frame.recv);
|
||||
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_BLOCK_CODE), 0.into());
|
||||
|
||||
if frame.iseq.is_some() {
|
||||
// Spill stack temps to let the callee use them (must be done before changing the SP register)
|
||||
asm.spill_regs();
|
||||
|
||||
// Saving SP before calculating ep avoids a dependency on a register
|
||||
// However this must be done after referencing frame.recv, which may be SP-relative
|
||||
asm.mov(SP, sp);
|
||||
}
|
||||
let ep = asm.sub(sp, SIZEOF_VALUE.into());
|
||||
asm.mov(cfp_opnd(RUBY_OFFSET_CFP_EP), ep);
|
||||
}
|
||||
@ -7770,9 +7791,32 @@ fn gen_send_iseq(
|
||||
pc: None, // We are calling into jitted code, which will set the PC as necessary
|
||||
}));
|
||||
|
||||
// Create a context for the callee
|
||||
let mut callee_ctx = Context::default();
|
||||
|
||||
// Transfer some stack temp registers to the callee's locals for arguments.
|
||||
let mapped_temps = if !forwarding {
|
||||
asm.map_temp_regs_to_args(&mut callee_ctx, argc)
|
||||
} else {
|
||||
// When forwarding, the callee's local table has only a callinfo,
|
||||
// so we can't map the actual arguments to the callee's locals.
|
||||
vec![]
|
||||
};
|
||||
|
||||
// Spill stack temps and locals that are not used by the callee.
|
||||
// This must be done before changing the SP register.
|
||||
asm.spill_regs_except(&mapped_temps);
|
||||
|
||||
// Saving SP before calculating ep avoids a dependency on a register
|
||||
// However this must be done after referencing frame.recv, which may be SP-relative
|
||||
asm.mov(SP, callee_sp);
|
||||
|
||||
// Log the name of the method we're calling to. We intentionally don't do this for inlined ISEQs.
|
||||
// We also do this after gen_push_frame() to minimize the impact of spill_temps() on asm.ccall().
|
||||
if get_option!(gen_stats) {
|
||||
// Protect caller-saved registers in case they're used for arguments
|
||||
asm.cpush_all();
|
||||
|
||||
// Assemble the ISEQ name string
|
||||
let name_str = get_iseq_name(iseq);
|
||||
|
||||
@ -7781,6 +7825,7 @@ fn gen_send_iseq(
|
||||
|
||||
// Increment the counter for this cfunc
|
||||
asm.ccall(incr_iseq_counter as *const u8, vec![iseq_idx.into()]);
|
||||
asm.cpop_all();
|
||||
}
|
||||
|
||||
// No need to set cfp->pc since the callee sets it whenever calling into routines
|
||||
@ -7794,9 +7839,6 @@ fn gen_send_iseq(
|
||||
idx: jit.next_insn_idx(),
|
||||
};
|
||||
|
||||
// Create a context for the callee
|
||||
let mut callee_ctx = Context::default();
|
||||
|
||||
// If the callee has :inline_block annotation and the callsite has a block ISEQ,
|
||||
// duplicate a callee block for each block ISEQ to make its `yield` monomorphic.
|
||||
if let (Some(BlockHandler::BlockISeq(iseq)), true) = (block, builtin_attrs & BUILTIN_ATTR_INLINE_BLOCK != 0) {
|
||||
@ -7816,6 +7858,7 @@ fn gen_send_iseq(
|
||||
callee_ctx.set_local_type(0, Type::Unknown)
|
||||
}
|
||||
|
||||
// Set the receiver type in the callee's context
|
||||
let recv_type = if captured_self {
|
||||
Type::Unknown // we don't track the type information of captured->self for now
|
||||
} else {
|
||||
@ -7823,6 +7866,29 @@ fn gen_send_iseq(
|
||||
};
|
||||
callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type);
|
||||
|
||||
// Now that callee_ctx is prepared, discover a block that can be reused if we move some registers.
|
||||
// If there's such a block, move registers accordingly to avoid creating a new block.
|
||||
let blockid = BlockId { iseq, idx: start_pc_offset };
|
||||
if !mapped_temps.is_empty() {
|
||||
// Discover a block that have the same things in different (or same) registers
|
||||
if let Some(block_ctx) = find_block_ctx_with_same_regs(blockid, &callee_ctx) {
|
||||
// List pairs of moves for making the register mappings compatible
|
||||
let mut moves = vec![];
|
||||
for ®_opnd in callee_ctx.get_reg_mapping().get_reg_opnds().iter() {
|
||||
let old_reg = TEMP_REGS[callee_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()];
|
||||
let new_reg = TEMP_REGS[block_ctx.get_reg_mapping().get_reg(reg_opnd).unwrap()];
|
||||
moves.push((new_reg, Opnd::Reg(old_reg)));
|
||||
}
|
||||
|
||||
// Shuffle them to break cycles and generate the moves
|
||||
let moves = Assembler::reorder_reg_moves(&moves);
|
||||
for (reg, opnd) in moves {
|
||||
asm.load_into(Opnd::Reg(reg), opnd);
|
||||
}
|
||||
callee_ctx.set_reg_mapping(block_ctx.get_reg_mapping());
|
||||
}
|
||||
}
|
||||
|
||||
// The callee might change locals through Kernel#binding and other means.
|
||||
asm.clear_local_types();
|
||||
|
||||
@ -7856,10 +7922,7 @@ fn gen_send_iseq(
|
||||
gen_direct_jump(
|
||||
jit,
|
||||
&callee_ctx,
|
||||
BlockId {
|
||||
iseq: iseq,
|
||||
idx: start_pc_offset,
|
||||
},
|
||||
blockid,
|
||||
asm,
|
||||
);
|
||||
|
||||
@ -8541,7 +8604,7 @@ fn gen_send_general(
|
||||
let mut flags = unsafe { vm_ci_flag(ci) };
|
||||
|
||||
// Defer compilation so we can specialize on class of receiver
|
||||
if !jit.at_current_insn() {
|
||||
if !jit.at_compile_target() {
|
||||
defer_compilation(jit, asm);
|
||||
return Some(EndBlock);
|
||||
}
|
||||
@ -9102,7 +9165,7 @@ fn gen_invokeblock_specialized(
|
||||
asm: &mut Assembler,
|
||||
cd: *const rb_call_data,
|
||||
) -> Option<CodegenStatus> {
|
||||
if !jit.at_current_insn() {
|
||||
if !jit.at_compile_target() {
|
||||
defer_compilation(jit, asm);
|
||||
return Some(EndBlock);
|
||||
}
|
||||
@ -9265,7 +9328,7 @@ fn gen_invokesuper_specialized(
|
||||
cd: *const rb_call_data,
|
||||
) -> Option<CodegenStatus> {
|
||||
// Defer compilation so we can specialize on class of receiver
|
||||
if !jit.at_current_insn() {
|
||||
if !jit.at_compile_target() {
|
||||
defer_compilation(jit, asm);
|
||||
return Some(EndBlock);
|
||||
}
|
||||
@ -9499,7 +9562,7 @@ fn gen_objtostring(
|
||||
jit: &mut JITState,
|
||||
asm: &mut Assembler,
|
||||
) -> Option<CodegenStatus> {
|
||||
if !jit.at_current_insn() {
|
||||
if !jit.at_compile_target() {
|
||||
defer_compilation(jit, asm);
|
||||
return Some(EndBlock);
|
||||
}
|
||||
@ -9842,7 +9905,7 @@ fn gen_getblockparamproxy(
|
||||
jit: &mut JITState,
|
||||
asm: &mut Assembler,
|
||||
) -> Option<CodegenStatus> {
|
||||
if !jit.at_current_insn() {
|
||||
if !jit.at_compile_target() {
|
||||
defer_compilation(jit, asm);
|
||||
return Some(EndBlock);
|
||||
}
|
||||
@ -10593,6 +10656,7 @@ mod tests {
|
||||
cb.get_write_ptr(),
|
||||
ptr::null(), // No execution context in tests. No peeking!
|
||||
ocb,
|
||||
true,
|
||||
)
|
||||
}
|
||||
|
||||
|
106
yjit/src/core.rs
106
yjit/src/core.rs
@ -371,6 +371,12 @@ impl RegMapping {
|
||||
.map(|(reg_idx, _)| reg_idx)
|
||||
}
|
||||
|
||||
/// Set a given operand to the register at a given index.
|
||||
pub fn set_reg(&mut self, opnd: RegOpnd, reg_idx: usize) {
|
||||
assert!(self.0[reg_idx].is_none());
|
||||
self.0[reg_idx] = Some(opnd);
|
||||
}
|
||||
|
||||
/// Allocate a register for a given operand if available.
|
||||
/// Return true if self is updated.
|
||||
pub fn alloc_reg(&mut self, opnd: RegOpnd) -> bool {
|
||||
@ -435,6 +441,32 @@ impl RegMapping {
|
||||
RegOpnd::Local(_) => index_temps.rev().find(|(_, reg_opnd)| reg_opnd.is_none()),
|
||||
}.map(|(index, _)| index)
|
||||
}
|
||||
|
||||
/// Return a vector of RegOpnds that have an allocated register
|
||||
pub fn get_reg_opnds(&self) -> Vec<RegOpnd> {
|
||||
self.0.iter().filter_map(|®_opnd| reg_opnd).collect()
|
||||
}
|
||||
|
||||
/// Return TypeDiff::Compatible(diff) if dst has a mapping that can be made by moving registers
|
||||
/// in self `diff` times. TypeDiff::Incompatible if they have different things in registers.
|
||||
pub fn diff(&self, dst: RegMapping) -> TypeDiff {
|
||||
let src_opnds = self.get_reg_opnds();
|
||||
let dst_opnds = dst.get_reg_opnds();
|
||||
if src_opnds.len() != dst_opnds.len() {
|
||||
return TypeDiff::Incompatible;
|
||||
}
|
||||
|
||||
let mut diff = 0;
|
||||
for ®_opnd in src_opnds.iter() {
|
||||
match (self.get_reg(reg_opnd), dst.get_reg(reg_opnd)) {
|
||||
(Some(src_idx), Some(dst_idx)) => if src_idx != dst_idx {
|
||||
diff += 1;
|
||||
}
|
||||
_ => return TypeDiff::Incompatible,
|
||||
}
|
||||
}
|
||||
TypeDiff::Compatible(diff)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for RegMapping {
|
||||
@ -2080,9 +2112,8 @@ pub fn take_version_list(blockid: BlockId) -> VersionList {
|
||||
}
|
||||
}
|
||||
|
||||
/// Count the number of block versions matching a given blockid
|
||||
/// `inlined: true` counts inlined versions, and `inlined: false` counts other versions.
|
||||
fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
|
||||
/// Count the number of block versions that match a given BlockId and part of a Context
|
||||
fn get_num_versions(blockid: BlockId, ctx: &Context) -> usize {
|
||||
let insn_idx = blockid.idx.as_usize();
|
||||
match get_iseq_payload(blockid.iseq) {
|
||||
|
||||
@ -2094,9 +2125,14 @@ fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
|
||||
.version_map
|
||||
.get(insn_idx)
|
||||
.map(|versions| {
|
||||
versions.iter().filter(|&&version|
|
||||
Context::decode(unsafe { version.as_ref() }.ctx).inline() == inlined
|
||||
).count()
|
||||
versions.iter().filter(|&&version| {
|
||||
let version_ctx = Context::decode(unsafe { version.as_ref() }.ctx);
|
||||
// Inline versions are counted separately towards MAX_INLINE_VERSIONS.
|
||||
version_ctx.inline() == ctx.inline() &&
|
||||
// find_block_versions() finds only blocks with compatible reg_mapping,
|
||||
// so count only versions with compatible reg_mapping.
|
||||
version_ctx.reg_mapping == ctx.reg_mapping
|
||||
}).count()
|
||||
})
|
||||
.unwrap_or(0)
|
||||
}
|
||||
@ -2128,10 +2164,7 @@ pub fn get_or_create_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> {
|
||||
/// Retrieve a basic block version for an (iseq, idx) tuple
|
||||
/// This will return None if no version is found
|
||||
fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
|
||||
let versions = match get_version_list(blockid) {
|
||||
Some(versions) => versions,
|
||||
None => return None,
|
||||
};
|
||||
let versions = get_version_list(blockid)?;
|
||||
|
||||
// Best match found
|
||||
let mut best_version: Option<BlockRef> = None;
|
||||
@ -2156,6 +2189,33 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
|
||||
return best_version;
|
||||
}
|
||||
|
||||
/// Basically find_block_version() but allows RegMapping incompatibility
|
||||
/// that can be fixed by register moves and returns Context
|
||||
pub fn find_block_ctx_with_same_regs(blockid: BlockId, ctx: &Context) -> Option<Context> {
|
||||
let versions = get_version_list(blockid)?;
|
||||
|
||||
// Best match found
|
||||
let mut best_ctx: Option<Context> = None;
|
||||
let mut best_diff = usize::MAX;
|
||||
|
||||
// For each version matching the blockid
|
||||
for blockref in versions.iter() {
|
||||
let block = unsafe { blockref.as_ref() };
|
||||
let block_ctx = Context::decode(block.ctx);
|
||||
|
||||
// Discover the best block that is compatible if we move registers
|
||||
match ctx.diff_with_same_regs(&block_ctx) {
|
||||
TypeDiff::Compatible(diff) if diff < best_diff => {
|
||||
best_ctx = Some(block_ctx);
|
||||
best_diff = diff;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
best_ctx
|
||||
}
|
||||
|
||||
/// Allow inlining a Block up to MAX_INLINE_VERSIONS times.
|
||||
const MAX_INLINE_VERSIONS: usize = 1000;
|
||||
|
||||
@ -2166,7 +2226,7 @@ pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
|
||||
return *ctx;
|
||||
}
|
||||
|
||||
let next_versions = get_num_versions(blockid, ctx.inline()) + 1;
|
||||
let next_versions = get_num_versions(blockid, ctx) + 1;
|
||||
let max_versions = if ctx.inline() {
|
||||
MAX_INLINE_VERSIONS
|
||||
} else {
|
||||
@ -2782,8 +2842,24 @@ impl Context {
|
||||
return TypeDiff::Compatible(diff);
|
||||
}
|
||||
|
||||
/// Basically diff() but allows RegMapping incompatibility that can be fixed
|
||||
/// by register moves.
|
||||
pub fn diff_with_same_regs(&self, dst: &Context) -> TypeDiff {
|
||||
// Prepare a Context with the same registers
|
||||
let mut dst_with_same_regs = dst.clone();
|
||||
dst_with_same_regs.set_reg_mapping(self.get_reg_mapping());
|
||||
|
||||
// Diff registers and other stuff separately, and merge them
|
||||
match (self.diff(&dst_with_same_regs), self.get_reg_mapping().diff(dst.get_reg_mapping())) {
|
||||
(TypeDiff::Compatible(ctx_diff), TypeDiff::Compatible(reg_diff)) => {
|
||||
TypeDiff::Compatible(ctx_diff + reg_diff)
|
||||
}
|
||||
_ => TypeDiff::Incompatible
|
||||
}
|
||||
}
|
||||
|
||||
pub fn two_fixnums_on_stack(&self, jit: &mut JITState) -> Option<bool> {
|
||||
if jit.at_current_insn() {
|
||||
if jit.at_compile_target() {
|
||||
let comptime_recv = jit.peek_at_stack(self, 1);
|
||||
let comptime_arg = jit.peek_at_stack(self, 0);
|
||||
return Some(comptime_recv.fixnum_p() && comptime_arg.fixnum_p());
|
||||
@ -2955,7 +3031,7 @@ fn gen_block_series_body(
|
||||
let mut batch = Vec::with_capacity(EXPECTED_BATCH_SIZE);
|
||||
|
||||
// Generate code for the first block
|
||||
let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb).ok()?;
|
||||
let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb, true).ok()?;
|
||||
batch.push(first_block); // Keep track of this block version
|
||||
|
||||
// Add the block version to the VersionMap for this ISEQ
|
||||
@ -2996,7 +3072,7 @@ fn gen_block_series_body(
|
||||
|
||||
// Generate new block using context from the last branch.
|
||||
let requested_ctx = Context::decode(requested_ctx);
|
||||
let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb);
|
||||
let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb, false);
|
||||
|
||||
// If the block failed to compile
|
||||
if result.is_err() {
|
||||
@ -4312,7 +4388,7 @@ mod tests {
|
||||
let cb = CodeBlock::new_dummy(1024);
|
||||
let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(1024));
|
||||
let dumm_addr = cb.get_write_ptr();
|
||||
let block = JITState::new(blockid, Context::default(), dumm_addr, ptr::null(), &mut ocb)
|
||||
let block = JITState::new(blockid, Context::default(), dumm_addr, ptr::null(), &mut ocb, true)
|
||||
.into_block(0, dumm_addr, dumm_addr, vec![]);
|
||||
let _dropper = BlockDropper(block);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user