YJIT: Reduce paddings if --yjit-exec-mem-size <= 128 on arm64 (#7671)
* YJIT: Reduce paddings if --yjit-exec-mem-size <= 128 on arm64 * YJIT: Define jmp_ptr_bytes on CodeBlock
This commit is contained in:
parent
8c360ce713
commit
7297374c5e
Notes:
git
2023-04-11 15:03:14 +00:00
Merged-By: maximecb <maximecb@ruby-lang.org>
@ -2,10 +2,6 @@ use std::cell::RefCell;
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::mem;
|
use std::mem;
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
use crate::backend::x86_64::JMP_PTR_BYTES;
|
|
||||||
#[cfg(target_arch = "aarch64")]
|
|
||||||
use crate::backend::arm64::JMP_PTR_BYTES;
|
|
||||||
use crate::core::IseqPayload;
|
use crate::core::IseqPayload;
|
||||||
use crate::core::for_each_off_stack_iseq_payload;
|
use crate::core::for_each_off_stack_iseq_payload;
|
||||||
use crate::core::for_each_on_stack_iseq_payload;
|
use crate::core::for_each_on_stack_iseq_payload;
|
||||||
@ -123,7 +119,7 @@ impl CodeBlock {
|
|||||||
page_size,
|
page_size,
|
||||||
write_pos: 0,
|
write_pos: 0,
|
||||||
past_page_bytes: 0,
|
past_page_bytes: 0,
|
||||||
page_end_reserve: JMP_PTR_BYTES,
|
page_end_reserve: 0,
|
||||||
label_addrs: Vec::new(),
|
label_addrs: Vec::new(),
|
||||||
label_names: Vec::new(),
|
label_names: Vec::new(),
|
||||||
label_refs: Vec::new(),
|
label_refs: Vec::new(),
|
||||||
@ -133,6 +129,7 @@ impl CodeBlock {
|
|||||||
dropped_bytes: false,
|
dropped_bytes: false,
|
||||||
freed_pages,
|
freed_pages,
|
||||||
};
|
};
|
||||||
|
cb.page_end_reserve = cb.jmp_ptr_bytes();
|
||||||
cb.write_pos = cb.page_start();
|
cb.write_pos = cb.page_start();
|
||||||
cb
|
cb
|
||||||
}
|
}
|
||||||
@ -196,7 +193,7 @@ impl CodeBlock {
|
|||||||
self.write_pos = dst_pos;
|
self.write_pos = dst_pos;
|
||||||
let dst_ptr = self.get_write_ptr();
|
let dst_ptr = self.get_write_ptr();
|
||||||
self.write_pos = src_pos;
|
self.write_pos = src_pos;
|
||||||
self.without_page_end_reserve(|cb| assert!(cb.has_capacity(JMP_PTR_BYTES)));
|
self.without_page_end_reserve(|cb| assert!(cb.has_capacity(cb.jmp_ptr_bytes())));
|
||||||
|
|
||||||
// Generate jmp_ptr from src_pos to dst_pos
|
// Generate jmp_ptr from src_pos to dst_pos
|
||||||
self.without_page_end_reserve(|cb| {
|
self.without_page_end_reserve(|cb| {
|
||||||
@ -242,6 +239,11 @@ impl CodeBlock {
|
|||||||
self.mem_block.borrow().mapped_region_size()
|
self.mem_block.borrow().mapped_region_size()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Size of the region in bytes where writes could be attempted.
|
||||||
|
pub fn virtual_region_size(&self) -> usize {
|
||||||
|
self.mem_block.borrow().virtual_region_size()
|
||||||
|
}
|
||||||
|
|
||||||
/// Return the number of code pages that have been mapped by the VirtualMemory.
|
/// Return the number of code pages that have been mapped by the VirtualMemory.
|
||||||
pub fn num_mapped_pages(&self) -> usize {
|
pub fn num_mapped_pages(&self) -> usize {
|
||||||
// CodeBlock's page size != VirtualMem's page size on Linux,
|
// CodeBlock's page size != VirtualMem's page size on Linux,
|
||||||
@ -287,7 +289,7 @@ impl CodeBlock {
|
|||||||
if cfg!(debug_assertions) && !cfg!(test) {
|
if cfg!(debug_assertions) && !cfg!(test) {
|
||||||
// Leave illegal instructions at the beginning of each page to assert
|
// Leave illegal instructions at the beginning of each page to assert
|
||||||
// we're not accidentally crossing page boundaries.
|
// we're not accidentally crossing page boundaries.
|
||||||
start += JMP_PTR_BYTES;
|
start += self.jmp_ptr_bytes();
|
||||||
}
|
}
|
||||||
start
|
start
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
#![allow(unused_variables)]
|
#![allow(unused_variables)]
|
||||||
#![allow(unused_imports)]
|
#![allow(unused_imports)]
|
||||||
|
|
||||||
|
use crate::asm::x86_64::jmp_ptr;
|
||||||
use crate::asm::{CodeBlock};
|
use crate::asm::{CodeBlock};
|
||||||
use crate::asm::arm64::*;
|
use crate::asm::arm64::*;
|
||||||
use crate::codegen::{JITState, CodegenGlobals};
|
use crate::codegen::{JITState, CodegenGlobals};
|
||||||
@ -38,8 +39,25 @@ pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG);
|
|||||||
pub const C_SP_REG: A64Opnd = X31;
|
pub const C_SP_REG: A64Opnd = X31;
|
||||||
pub const C_SP_STEP: i32 = 16;
|
pub const C_SP_STEP: i32 = 16;
|
||||||
|
|
||||||
// The number of bytes that are generated by emit_jmp_ptr
|
impl CodeBlock {
|
||||||
pub const JMP_PTR_BYTES: usize = 20;
|
// The maximum number of bytes that can be generated by emit_jmp_ptr.
|
||||||
|
pub fn jmp_ptr_bytes(&self) -> usize {
|
||||||
|
// b instruction's offset is encoded as imm26 times 4. It can jump to
|
||||||
|
// +/-128MiB, so this can be used when --yjit-exec-mem-size <= 128.
|
||||||
|
let num_insns = if b_offset_fits_bits(self.virtual_region_size() as i64 / 4) {
|
||||||
|
1 // b instruction
|
||||||
|
} else {
|
||||||
|
5 // 4 instructions to load a 64-bit absolute address + br instruction
|
||||||
|
};
|
||||||
|
num_insns * 4
|
||||||
|
}
|
||||||
|
|
||||||
|
// The maximum number of instructions that can be generated by emit_conditional_jump.
|
||||||
|
fn conditional_jump_insns(&self) -> i32 {
|
||||||
|
// The worst case is instructions for a jump + bcond.
|
||||||
|
self.jmp_ptr_bytes() as i32 / 4 + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Map Opnd to A64Opnd
|
/// Map Opnd to A64Opnd
|
||||||
impl From<Opnd> for A64Opnd {
|
impl From<Opnd> for A64Opnd {
|
||||||
@ -110,7 +128,8 @@ fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) {
|
|||||||
// Make sure it's always a consistent number of
|
// Make sure it's always a consistent number of
|
||||||
// instructions in case it gets patched and has to
|
// instructions in case it gets patched and has to
|
||||||
// use the other branch.
|
// use the other branch.
|
||||||
for _ in num_insns..(JMP_PTR_BYTES / 4) {
|
assert!(num_insns * 4 <= cb.jmp_ptr_bytes());
|
||||||
|
for _ in num_insns..(cb.jmp_ptr_bytes() / 4) {
|
||||||
nop(cb);
|
nop(cb);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -697,6 +716,18 @@ impl Assembler
|
|||||||
// Here we're going to return 1 because we've only
|
// Here we're going to return 1 because we've only
|
||||||
// written out 1 instruction.
|
// written out 1 instruction.
|
||||||
1
|
1
|
||||||
|
} else if b_offset_fits_bits((dst_addr - (src_addr + 4)) / 4) { // + 4 for bcond
|
||||||
|
// If the jump offset fits into the unconditional jump as
|
||||||
|
// an immediate value, we can use inverse b.cond + b.
|
||||||
|
//
|
||||||
|
// We're going to write out the inverse condition so
|
||||||
|
// that if it doesn't match it will skip over the
|
||||||
|
// instruction used for branching.
|
||||||
|
bcond(cb, Condition::inverse(CONDITION), 2.into());
|
||||||
|
b(cb, InstructionOffset::from_bytes((dst_addr - (src_addr + 4)) as i32)); // + 4 for bcond
|
||||||
|
|
||||||
|
// We've only written out 2 instructions.
|
||||||
|
2
|
||||||
} else {
|
} else {
|
||||||
// Otherwise, we need to load the address into a
|
// Otherwise, we need to load the address into a
|
||||||
// register and use the branch register instruction.
|
// register and use the branch register instruction.
|
||||||
@ -720,7 +751,8 @@ impl Assembler
|
|||||||
// We need to make sure we have at least 6 instructions for
|
// We need to make sure we have at least 6 instructions for
|
||||||
// every kind of jump for invalidation purposes, so we're
|
// every kind of jump for invalidation purposes, so we're
|
||||||
// going to write out padding nop instructions here.
|
// going to write out padding nop instructions here.
|
||||||
for _ in num_insns..6 { nop(cb); }
|
assert!(num_insns <= cb.conditional_jump_insns());
|
||||||
|
for _ in num_insns..cb.conditional_jump_insns() { nop(cb); }
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
Target::Label(label_idx) => {
|
Target::Label(label_idx) => {
|
||||||
@ -1063,7 +1095,7 @@ impl Assembler
|
|||||||
Insn::RegTemps(_) |
|
Insn::RegTemps(_) |
|
||||||
Insn::SpillTemp(_) => (), // just a reg alloc signal, no code
|
Insn::SpillTemp(_) => (), // just a reg alloc signal, no code
|
||||||
Insn::PadInvalPatch => {
|
Insn::PadInvalPatch => {
|
||||||
while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < JMP_PTR_BYTES && !cb.has_dropped_bytes() {
|
while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < cb.jmp_ptr_bytes() && !cb.has_dropped_bytes() {
|
||||||
nop(cb);
|
nop(cb);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -34,8 +34,10 @@ pub const _C_ARG_OPNDS: [Opnd; 6] = [
|
|||||||
pub const C_RET_REG: Reg = RAX_REG;
|
pub const C_RET_REG: Reg = RAX_REG;
|
||||||
pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG);
|
pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG);
|
||||||
|
|
||||||
// The number of bytes that are generated by jmp_ptr
|
impl CodeBlock {
|
||||||
pub const JMP_PTR_BYTES: usize = 6;
|
// The number of bytes that are generated by jmp_ptr
|
||||||
|
pub fn jmp_ptr_bytes(&self) -> usize { 6 }
|
||||||
|
}
|
||||||
|
|
||||||
/// Map Opnd to X86Opnd
|
/// Map Opnd to X86Opnd
|
||||||
impl From<Opnd> for X86Opnd {
|
impl From<Opnd> for X86Opnd {
|
||||||
@ -718,8 +720,8 @@ impl Assembler
|
|||||||
Insn::SpillTemp(_) => (), // just a reg alloc signal, no code
|
Insn::SpillTemp(_) => (), // just a reg alloc signal, no code
|
||||||
Insn::PadInvalPatch => {
|
Insn::PadInvalPatch => {
|
||||||
let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()));
|
let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()));
|
||||||
if code_size < JMP_PTR_BYTES {
|
if code_size < cb.jmp_ptr_bytes() {
|
||||||
nop(cb, (JMP_PTR_BYTES - code_size) as u32);
|
nop(cb, (cb.jmp_ptr_bytes() - code_size) as u32);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user