Better b.cond usage on AArch64 (#6305)

* Better b.cond usage on AArch64

When we're lowering a conditional jump, we previously had a bit of
a complicated setup where we could emit a conditional jump to skip
over a jump that was the next instruction, and then write out the
destination and use a branch register.

Now instead we use the b.cond instruction if our offset fits (not
common, but not unused either) and if it doesn't we write out an
inverse condition to jump past loading the destination and
branching directly.

* Added an inverse fn for Condition (#443)

Prevents the need to pass two params and potentially reduces errors.

Co-authored-by: Jimmy Miller <jimmyhmiller@jimmys-mbp.lan>

Co-authored-by: Maxime Chevalier-Boisvert <maximechevalierb@gmail.com>
Co-authored-by: Jimmy Miller <jimmyhmiller@jimmys-mbp.lan>
This commit is contained in:
Kevin Newton 2022-08-31 15:44:26 -04:00 committed by GitHub
parent 32a0591515
commit be55b77cc7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
Notes: git 2022-09-01 04:44:55 +09:00
Merged-By: maximecb <maximecb@ruby-lang.org>
4 changed files with 96 additions and 71 deletions

View File

@ -19,4 +19,34 @@ impl Condition {
pub const GT: u8 = 0b1100; // greater than (signed) pub const GT: u8 = 0b1100; // greater than (signed)
pub const LE: u8 = 0b1101; // less than or equal to (signed) pub const LE: u8 = 0b1101; // less than or equal to (signed)
pub const AL: u8 = 0b1110; // always pub const AL: u8 = 0b1110; // always
}
pub const fn inverse(condition: u8) -> u8 {
match condition {
Condition::EQ => Condition::NE,
Condition::NE => Condition::EQ,
Condition::CS => Condition::CC,
Condition::CC => Condition::CS,
Condition::MI => Condition::PL,
Condition::PL => Condition::MI,
Condition::VS => Condition::VC,
Condition::VC => Condition::VS,
Condition::HI => Condition::LS,
Condition::LS => Condition::HI,
Condition::LT => Condition::GE,
Condition::GE => Condition::LT,
Condition::GT => Condition::LE,
Condition::LE => Condition::GT,
Condition::AL => Condition::AL,
_ => panic!("Unknown condition")
}
}
}

View File

@ -20,8 +20,8 @@ pub struct BranchCond {
impl BranchCond { impl BranchCond {
/// B.cond /// B.cond
/// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally- /// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/B-cond--Branch-conditionally-
pub fn bcond(cond: u8, byte_offset: i32) -> Self { pub fn bcond(cond: u8, imm19: i32) -> Self {
Self { cond, imm19: byte_offset >> 2 } Self { cond, imm19 }
} }
} }
@ -53,25 +53,25 @@ mod tests {
#[test] #[test]
fn test_b_eq() { fn test_b_eq() {
let result: u32 = BranchCond::bcond(Condition::EQ, 128).into(); let result: u32 = BranchCond::bcond(Condition::EQ, 32).into();
assert_eq!(0x54000400, result); assert_eq!(0x54000400, result);
} }
#[test] #[test]
fn test_b_vs() { fn test_b_vs() {
let result: u32 = BranchCond::bcond(Condition::VS, 128).into(); let result: u32 = BranchCond::bcond(Condition::VS, 32).into();
assert_eq!(0x54000406, result); assert_eq!(0x54000406, result);
} }
#[test] #[test]
fn test_b_eq_max() { fn test_b_eq_max() {
let result: u32 = BranchCond::bcond(Condition::EQ, (1 << 20) - 4).into(); let result: u32 = BranchCond::bcond(Condition::EQ, (1 << 18) - 1).into();
assert_eq!(0x547fffe0, result); assert_eq!(0x547fffe0, result);
} }
#[test] #[test]
fn test_b_eq_min() { fn test_b_eq_min() {
let result: u32 = BranchCond::bcond(Condition::EQ, -(1 << 20)).into(); let result: u32 = BranchCond::bcond(Condition::EQ, -(1 << 18)).into();
assert_eq!(0x54800000, result); assert_eq!(0x54800000, result);
} }
} }

View File

@ -203,9 +203,10 @@ pub fn b(cb: &mut CodeBlock, imm26: A64Opnd) {
cb.write_bytes(&bytes); cb.write_bytes(&bytes);
} }
/// Whether or not the offset between two instructions fits into the b.cond /// Whether or not the offset in number of instructions between two instructions
/// instruction. If it doesn't, then we have to load the value into a register /// fits into the b.cond instruction. If it doesn't, then we have to load the
/// first, then use the b.cond instruction to skip past a direct jump. /// value into a register first, then use the b.cond instruction to skip past a
/// direct jump.
pub const fn bcond_offset_fits_bits(offset: i64) -> bool { pub const fn bcond_offset_fits_bits(offset: i64) -> bool {
imm_fits_bits(offset, 21) && (offset & 0b11 == 0) imm_fits_bits(offset, 21) && (offset & 0b11 == 0)
} }
@ -216,7 +217,7 @@ pub fn bcond(cb: &mut CodeBlock, cond: u8, byte_offset: A64Opnd) {
A64Opnd::Imm(imm) => { A64Opnd::Imm(imm) => {
assert!(bcond_offset_fits_bits(imm), "The immediate operand must be 21 bits or less and be aligned to a 2-bit boundary."); assert!(bcond_offset_fits_bits(imm), "The immediate operand must be 21 bits or less and be aligned to a 2-bit boundary.");
BranchCond::bcond(cond, imm as i32).into() BranchCond::bcond(cond, (imm / 4) as i32).into()
}, },
_ => panic!("Invalid operand combination to bcond instruction."), _ => panic!("Invalid operand combination to bcond instruction."),
}; };

View File

@ -565,64 +565,42 @@ impl Assembler
fn emit_conditional_jump<const CONDITION: u8>(cb: &mut CodeBlock, target: Target) { fn emit_conditional_jump<const CONDITION: u8>(cb: &mut CodeBlock, target: Target) {
match target { match target {
Target::CodePtr(dst_ptr) => { Target::CodePtr(dst_ptr) => {
let dst_addr = dst_ptr.into_u64(); let dst_addr = dst_ptr.into_i64();
//let src_addr = cb.get_write_ptr().into_i64() + 4; let src_addr = cb.get_write_ptr().into_i64();
//let offset = dst_addr - src_addr; let offset = dst_addr - src_addr;
// If the condition is met, then we'll skip past the let num_insns = if bcond_offset_fits_bits(offset) {
// next instruction, put the address in a register, and // If the jump offset fits into the conditional jump as
// jump to it. // an immediate value and it's properly aligned, then we
bcond(cb, CONDITION, A64Opnd::new_imm(8)); // can use the b.cond instruction directly.
bcond(cb, CONDITION, A64Opnd::new_imm(offset));
// If we get to this instruction, then the condition // Here we're going to return 1 because we've only
// wasn't met, in which case we'll jump past the // written out 1 instruction.
// next instruction that perform the direct jump. 1
b(cb, A64Opnd::new_imm(2i64 + emit_load_size(dst_addr) as i64));
let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr);
br(cb, Assembler::SCRATCH0);
for _ in num_insns..4 {
nop(cb);
}
/*
// If the jump offset fits into the conditional jump as an
// immediate value and it's properly aligned, then we can
// use the b.cond instruction directly. Otherwise, we need
// to load the address into a register and use the branch
// register instruction.
if bcond_offset_fits_bits(offset) {
bcond(cb, CONDITION, A64Opnd::new_imm(dst_addr - src_addr));
} else { } else {
// If the condition is met, then we'll skip past the // Otherwise, we need to load the address into a
// next instruction, put the address in a register, and // register and use the branch register instruction.
// jump to it. let dst_addr = dst_ptr.into_u64();
bcond(cb, CONDITION, A64Opnd::new_imm(8)); let load_insns: i64 = emit_load_size(dst_addr).into();
// If the offset fits into a direct jump, then we'll use // We're going to write out the inverse condition so
// that and the number of instructions will be shorter. // that if it doesn't match it will skip over the
// Otherwise we'll use the branch register instruction. // instructions used for branching.
if b_offset_fits_bits(offset) { bcond(cb, Condition::inverse(CONDITION), A64Opnd::new_imm((load_insns + 2) * 4));
// If we get to this instruction, then the condition emit_load_value(cb, Assembler::SCRATCH0, dst_addr);
// wasn't met, in which case we'll jump past the br(cb, Assembler::SCRATCH0);
// next instruction that performs the direct jump.
b(cb, A64Opnd::new_imm(1));
// Here we'll perform the direct jump to the target. // Here we'll return the number of instructions that it
let offset = dst_addr - cb.get_write_ptr().into_i64() + 4; // took to write out the destination address + 1 for the
b(cb, A64Opnd::new_imm(offset / 4)); // b.cond and 1 for the br.
} else { load_insns + 2
// If we get to this instruction, then the condition };
// wasn't met, in which case we'll jump past the
// next instruction that perform the direct jump.
let value = dst_addr as u64;
b(cb, A64Opnd::new_imm(emit_load_size(value).into())); // We need to make sure we have at least 6 instructions for
emit_load_value(cb, Assembler::SCRATCH0, value); // every kind of jump for invalidation purposes, so we're
br(cb, Assembler::SCRATCH0); // going to write out padding nop instructions here.
} for _ in num_insns..6 { nop(cb); }
}
*/
}, },
Target::Label(label_idx) => { Target::Label(label_idx) => {
// Here we're going to save enough space for ourselves and // Here we're going to save enough space for ourselves and
@ -904,10 +882,10 @@ impl Assembler
_ => unreachable!() _ => unreachable!()
}; };
}, },
Insn::Je(target) => { Insn::Je(target) | Insn::Jz(target) => {
emit_conditional_jump::<{Condition::EQ}>(cb, *target); emit_conditional_jump::<{Condition::EQ}>(cb, *target);
}, },
Insn::Jne(target) => { Insn::Jne(target) | Insn::Jnz(target) => {
emit_conditional_jump::<{Condition::NE}>(cb, *target); emit_conditional_jump::<{Condition::NE}>(cb, *target);
}, },
Insn::Jl(target) => { Insn::Jl(target) => {
@ -916,12 +894,6 @@ impl Assembler
Insn::Jbe(target) => { Insn::Jbe(target) => {
emit_conditional_jump::<{Condition::LS}>(cb, *target); emit_conditional_jump::<{Condition::LS}>(cb, *target);
}, },
Insn::Jz(target) => {
emit_conditional_jump::<{Condition::EQ}>(cb, *target);
},
Insn::Jnz(target) => {
emit_conditional_jump::<{Condition::NE}>(cb, *target);
},
Insn::Jo(target) => { Insn::Jo(target) => {
emit_conditional_jump::<{Condition::VS}>(cb, *target); emit_conditional_jump::<{Condition::VS}>(cb, *target);
}, },
@ -1053,6 +1025,28 @@ mod tests {
asm.compile_with_num_regs(&mut cb, 0); asm.compile_with_num_regs(&mut cb, 0);
} }
#[test]
fn test_emit_je_fits_into_bcond() {
let (mut asm, mut cb) = setup_asm();
let offset = 80;
let target: CodePtr = ((cb.get_write_ptr().into_u64() + offset) as *mut u8).into();
asm.je(Target::CodePtr(target));
asm.compile_with_num_regs(&mut cb, 0);
}
#[test]
fn test_emit_je_does_not_fit_into_bcond() {
let (mut asm, mut cb) = setup_asm();
let offset = 1 << 21;
let target: CodePtr = ((cb.get_write_ptr().into_u64() + offset) as *mut u8).into();
asm.je(Target::CodePtr(target));
asm.compile_with_num_regs(&mut cb, 0);
}
#[test] #[test]
fn test_emit_lea_label() { fn test_emit_lea_label() {
let (mut asm, mut cb) = setup_asm(); let (mut asm, mut cb) = setup_asm();