YJIT: implement variable-length context encoding scheme (#10888)

* Implement BitVector data structure for variable-length context encoding

* Rename method to make intent clearer

* Rename write_uint => push_uint to make intent clearer

* Implement debug trait for BitVector

* Fix bug in BitVector::read_uint_at(), enable more tests

* Add one more test for good measure

* Start sketching Context::encode()

* Progress on variable length context encoding

* Add tests. Fix bug.

* Encode stack state

* Add comments. Try to estimate context encoding size.

* More compact encoding for stack size

* Commit before rebase

* Change Context::encode() to take a BitVector as input

* Refactor BitVector::read_uint(), add helper read functions

* Implement Context::decode() function. Add test.

* Fix bug, add tests

* Rename methods

* Add Context::encode() and decode() methods using global data

* Make encode and decode methods use u32 indices

* Refactor YJIT to use variable-length context encoding

* Tag functions as allow unused

* Add a simple caching mechanism and stats for bytes per context etc

* Add comments, fix formatting

* Grow vector of bytes by 1.2x instead of 2x

* Add debug assert to check round-trip encoding-decoding

* Take some rustfmt formatting

* Add decoded_from field to Context to reuse previous encodings

* Remove olde context stats

* Re-add stack_size assert

* Disable decoded_from optimization for now
This commit is contained in:
Maxime Chevalier-Boisvert 2024-06-07 16:26:14 -04:00 committed by GitHub
parent faad2bc6e1
commit 425e630ce7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 623 additions and 66 deletions

2
yjit.c
View File

@ -1245,7 +1245,7 @@ rb_yjit_set_exception_return(rb_control_frame_t *cfp, void *leave_exit, void *le
VALUE rb_yjit_stats_enabled_p(rb_execution_context_t *ec, VALUE self);
VALUE rb_yjit_print_stats_p(rb_execution_context_t *ec, VALUE self);
VALUE rb_yjit_trace_exit_locations_enabled_p(rb_execution_context_t *ec, VALUE self);
VALUE rb_yjit_get_stats(rb_execution_context_t *ec, VALUE self, VALUE context);
VALUE rb_yjit_get_stats(rb_execution_context_t *ec, VALUE self);
VALUE rb_yjit_reset_stats_bang(rb_execution_context_t *ec, VALUE self);
VALUE rb_yjit_disasm_iseq(rb_execution_context_t *ec, VALUE self, VALUE iseq);
VALUE rb_yjit_insns_compiled(rb_execution_context_t *ec, VALUE self, VALUE iseq);

14
yjit.rb
View File

@ -155,8 +155,8 @@ module RubyVM::YJIT
# Return a hash for statistics generated for the `--yjit-stats` command line option.
# Return `nil` when option is not passed or unavailable.
def self.runtime_stats(context: false)
stats = Primitive.rb_yjit_get_stats(context)
def self.runtime_stats()
stats = Primitive.rb_yjit_get_stats()
return stats if stats.nil?
stats[:object_shape_count] = Primitive.object_shape_count
@ -313,7 +313,7 @@ module RubyVM::YJIT
# Format and print out counters
def _print_stats(out: $stderr) # :nodoc:
stats = runtime_stats(context: true)
stats = runtime_stats()
return unless Primitive.rb_yjit_stats_enabled_p
out.puts("***YJIT: Printing YJIT statistics on exit***")
@ -388,8 +388,12 @@ module RubyVM::YJIT
out.puts "freed_code_size: " + format_number(13, stats[:freed_code_size])
out.puts "yjit_alloc_size: " + format_number(13, stats[:yjit_alloc_size]) if stats.key?(:yjit_alloc_size)
out.puts "live_context_size: " + format_number(13, stats[:live_context_size])
out.puts "live_context_count: " + format_number(13, stats[:live_context_count])
bytes_per_context = stats[:context_data_bytes].fdiv(stats[:num_contexts_encoded])
out.puts "context_data_bytes: " + format_number(13, stats[:context_data_bytes])
out.puts "num_contexts_encoded: " + format_number(13, stats[:num_contexts_encoded])
out.puts "bytes_per_context: " + ("%13.2f" % bytes_per_context)
out.puts "live_page_count: " + format_number(13, stats[:live_page_count])
out.puts "freed_page_count: " + format_number(13, stats[:freed_page_count])
out.puts "code_gc_count: " + format_number(13, stats[:code_gc_count])

View File

@ -5789,7 +5789,7 @@ fn jit_rb_str_getbyte(
RUBY_OFFSET_RSTRING_LEN as i32,
);
// Exit if the indes is out of bounds
// Exit if the index is out of bounds
asm.cmp(idx, str_len_opnd);
asm.jge(Target::side_exit(Counter::getbyte_idx_out_of_bounds));
@ -10333,6 +10333,9 @@ fn yjit_reg_method(klass: VALUE, mid_str: &str, gen_fn: MethodGenFn) {
/// Global state needed for code generation
pub struct CodegenGlobals {
/// Flat vector of bits to store compressed context data
context_data: BitVector,
/// Inline code block (fast path)
inline_cb: CodeBlock,
@ -10448,6 +10451,7 @@ impl CodegenGlobals {
ocb.unwrap().mark_all_executable();
let codegen_globals = CodegenGlobals {
context_data: BitVector::new(),
inline_cb: cb,
outlined_cb: ocb,
leave_exit_code,
@ -10476,6 +10480,11 @@ impl CodegenGlobals {
unsafe { CODEGEN_GLOBALS.as_mut().is_some() }
}
/// Get a mutable reference to the context data
pub fn get_context_data() -> &'static mut BitVector {
&mut CodegenGlobals::get_instance().context_data
}
/// Get a mutable reference to the inline code block
pub fn get_inline_cb() -> &'static mut CodeBlock {
&mut CodegenGlobals::get_instance().inline_cb

View File

@ -457,8 +457,13 @@ const CHAIN_DEPTH_MASK: u8 = 0b00111111; // 63
/// Contains information we can use to specialize/optimize code
/// There are a lot of context objects so we try to keep the size small.
#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)]
#[repr(packed)]
pub struct Context {
// FIXME: decoded_from breaks == on contexts
/*
// Offset at which this context was previously encoded (zero if not)
decoded_from: u32,
*/
// Number of values currently on the temporary stack
stack_size: u8,
@ -498,6 +503,568 @@ pub struct Context {
inline_block: u64,
}
#[derive(Clone)]
pub struct BitVector {
// Flat vector of bytes to write into
bytes: Vec<u8>,
// Number of bits taken out of bytes allocated
num_bits: usize,
}
impl BitVector {
pub fn new() -> Self {
Self {
bytes: Vec::with_capacity(4096),
num_bits: 0,
}
}
#[allow(unused)]
pub fn num_bits(&self) -> usize {
self.num_bits
}
// Total number of bytes taken
#[allow(unused)]
pub fn num_bytes(&self) -> usize {
(self.num_bits / 8) + if (self.num_bits % 8) != 0 { 1 } else { 0 }
}
// Write/append an unsigned integer value
fn push_uint(&mut self, mut val: u64, mut num_bits: usize) {
assert!(num_bits <= 64);
// Mask out bits above the number of bits requested
let mut val_bits = val;
if num_bits < 64 {
val_bits &= (1 << num_bits) - 1;
assert!(val == val_bits);
}
// Number of bits encoded in the last byte
let rem_bits = self.num_bits % 8;
// Encode as many bits as we can in this last byte
if rem_bits != 0 {
let num_enc = std::cmp::min(num_bits, 8 - rem_bits);
let bit_mask = (1 << num_enc) - 1;
let frac_bits = (val & bit_mask) << rem_bits;
let frac_bits: u8 = frac_bits.try_into().unwrap();
let last_byte_idx = self.bytes.len() - 1;
self.bytes[last_byte_idx] |= frac_bits;
self.num_bits += num_enc;
num_bits -= num_enc;
val >>= num_enc;
}
// While we have bits left to encode
while num_bits > 0 {
// Grow with a 1.2x growth factor instead of 2x
assert!(self.num_bits % 8 == 0);
let num_bytes = self.num_bits / 8;
if num_bytes == self.bytes.capacity() {
self.bytes.reserve_exact(self.bytes.len() / 5);
}
let bits = val & 0xFF;
let bits: u8 = bits.try_into().unwrap();
self.bytes.push(bits);
let bits_to_encode = std::cmp::min(num_bits, 8);
self.num_bits += bits_to_encode;
num_bits -= bits_to_encode;
val >>= bits_to_encode;
}
}
fn push_u8(&mut self, val: u8) {
self.push_uint(val as u64, 8);
}
fn push_u4(&mut self, val: u8) {
assert!(val < 16);
self.push_uint(val as u64, 4);
}
fn push_u3(&mut self, val: u8) {
assert!(val < 8);
self.push_uint(val as u64, 3);
}
fn push_u2(&mut self, val: u8) {
assert!(val < 4);
self.push_uint(val as u64, 2);
}
fn push_u1(&mut self, val: u8) {
assert!(val < 2);
self.push_uint(val as u64, 1);
}
// Push a context encoding opcode
fn push_op(&mut self, op: CtxOp) {
self.push_u4(op as u8);
}
// Read a uint value at a given bit index
// The bit index is incremented after the value is read
fn read_uint(&self, bit_idx: &mut usize, mut num_bits: usize) -> u64 {
let start_bit_idx = *bit_idx;
let mut cur_idx = *bit_idx;
// Read the bits in the first byte
let bit_mod = cur_idx % 8;
let bits_in_byte = self.bytes[cur_idx / 8] >> bit_mod;
let num_bits_in_byte = std::cmp::min(num_bits, 8 - bit_mod);
cur_idx += num_bits_in_byte;
num_bits -= num_bits_in_byte;
let mut out_bits = (bits_in_byte as u64) & ((1 << num_bits_in_byte) - 1);
// While we have bits left to read
while num_bits > 0 {
let num_bits_in_byte = std::cmp::min(num_bits, 8);
assert!(cur_idx % 8 == 0);
let byte = self.bytes[cur_idx / 8] as u64;
let bits_in_byte = byte & ((1 << num_bits) - 1);
out_bits |= bits_in_byte << (cur_idx - start_bit_idx);
// Move to the next byte/offset
cur_idx += num_bits_in_byte;
num_bits -= num_bits_in_byte;
}
// Update the read index
*bit_idx = cur_idx;
out_bits
}
fn read_u8(&self, bit_idx: &mut usize) -> u8 {
self.read_uint(bit_idx, 8) as u8
}
fn read_u4(&self, bit_idx: &mut usize) -> u8 {
self.read_uint(bit_idx, 4) as u8
}
fn read_u3(&self, bit_idx: &mut usize) -> u8 {
self.read_uint(bit_idx, 3) as u8
}
fn read_u2(&self, bit_idx: &mut usize) -> u8 {
self.read_uint(bit_idx, 2) as u8
}
fn read_u1(&self, bit_idx: &mut usize) -> u8 {
self.read_uint(bit_idx, 1) as u8
}
fn read_op(&self, bit_idx: &mut usize) -> CtxOp {
unsafe { std::mem::transmute(self.read_u4(bit_idx)) }
}
}
impl fmt::Debug for BitVector {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// We print the higher bytes first
for (idx, byte) in self.bytes.iter().enumerate().rev() {
write!(f, "{:08b}", byte)?;
// Insert a separator between each byte
if idx > 0 {
write!(f, "|")?;
}
}
Ok(())
}
}
#[cfg(test)]
mod bitvector_tests {
use super::*;
#[test]
fn write_3() {
let mut arr = BitVector::new();
arr.push_uint(3, 2);
assert!(arr.read_uint(&mut 0, 2) == 3);
}
#[test]
fn write_11() {
let mut arr = BitVector::new();
arr.push_uint(1, 1);
arr.push_uint(1, 1);
assert!(arr.read_uint(&mut 0, 2) == 3);
}
#[test]
fn write_11_overlap() {
let mut arr = BitVector::new();
arr.push_uint(0, 7);
arr.push_uint(3, 2);
arr.push_uint(1, 1);
//dbg!(arr.read_uint(7, 2));
assert!(arr.read_uint(&mut 7, 2) == 3);
}
#[test]
fn write_ff_0() {
let mut arr = BitVector::new();
arr.push_uint(0xFF, 8);
assert!(arr.read_uint(&mut 0, 8) == 0xFF);
}
#[test]
fn write_ff_3() {
// Write 0xFF at bit index 3
let mut arr = BitVector::new();
arr.push_uint(0, 3);
arr.push_uint(0xFF, 8);
assert!(arr.read_uint(&mut 3, 8) == 0xFF);
}
#[test]
fn write_ff_sandwich() {
// Write 0xFF sandwiched between zeros
let mut arr = BitVector::new();
arr.push_uint(0, 3);
arr.push_u8(0xFF);
arr.push_uint(0, 3);
assert!(arr.read_uint(&mut 3, 8) == 0xFF);
}
#[test]
fn write_read_u32_max() {
let mut arr = BitVector::new();
arr.push_uint(0xFF_FF_FF_FF, 32);
assert!(arr.read_uint(&mut 0, 32) == 0xFF_FF_FF_FF);
}
#[test]
fn write_read_u32_max_64b() {
let mut arr = BitVector::new();
arr.push_uint(0xFF_FF_FF_FF, 64);
assert!(arr.read_uint(&mut 0, 64) == 0xFF_FF_FF_FF);
}
#[test]
fn write_read_u64_max() {
let mut arr = BitVector::new();
arr.push_uint(u64::MAX, 64);
assert!(arr.read_uint(&mut 0, 64) == u64::MAX);
}
#[test]
fn encode_default() {
let mut bits = BitVector::new();
let ctx = Context::default();
let start_idx = ctx.encode_into(&mut bits);
assert!(start_idx == 0);
assert!(bits.num_bits() > 0);
assert!(bits.num_bytes() > 0);
// Make sure that the round trip matches the input
let ctx2 = Context::decode_from(&bits, 0);
assert!(ctx2 == ctx);
}
#[test]
fn encode_default_2x() {
let mut bits = BitVector::new();
let ctx0 = Context::default();
let idx0 = ctx0.encode_into(&mut bits);
let mut ctx1 = Context::default();
ctx1.reg_temps = RegTemps(1);
let idx1 = ctx1.encode_into(&mut bits);
// Make sure that we can encode two contexts successively
let ctx0_dec = Context::decode_from(&bits, idx0);
let ctx1_dec = Context::decode_from(&bits, idx1);
assert!(ctx0_dec == ctx0);
assert!(ctx1_dec == ctx1);
}
#[test]
fn regress_reg_temps() {
let mut bits = BitVector::new();
let mut ctx = Context::default();
ctx.reg_temps = RegTemps(1);
ctx.encode_into(&mut bits);
let b0 = bits.read_u1(&mut 0);
assert!(b0 == 1);
// Make sure that the round trip matches the input
let ctx2 = Context::decode_from(&bits, 0);
assert!(ctx2 == ctx);
}
}
// Context encoding opcodes (4 bits)
#[derive(Debug, Copy, Clone)]
#[repr(u8)]
enum CtxOp {
// Self type (4 bits)
SetSelfType = 0,
// Local idx (3 bits), temp type (4 bits)
SetLocalType,
// Map stack temp to self with known type
// Temp idx (3 bits), known type (4 bits)
SetTempType,
// Map stack temp to a local variable
// Temp idx (3 bits), local idx (3 bits)
MapTempLocal,
// Map a stack temp to self
// Temp idx (3 bits)
MapTempSelf,
// Set inline block pointer (8 bytes)
SetInlineBlock,
// End of encoding
EndOfCode,
}
// Cache of the last context encoded
// Empirically this saves a few percent of memory
// We can experiment with varying the size of this cache
static mut LAST_CTX_ENCODED: Option<(Context, u32)> = None;
impl Context {
pub fn encode(&self) -> u32 {
incr_counter!(num_contexts_encoded);
if *self == Context::default() {
return 0;
}
/*
// If this context was previously decoded and was not changed since
if self.decoded_from != 0 && Self::decode(self.decoded_from) == *self {
return self.decoded_from;
}
*/
// If this context was recently encoded (cache check)
unsafe {
if let Some((ctx, idx)) = LAST_CTX_ENCODED {
if ctx == *self {
return idx;
}
}
}
let context_data = CodegenGlobals::get_context_data();
// Offset 0 is reserved for the default context
if context_data.num_bits() == 0 {
context_data.push_u1(0);
}
let idx = self.encode_into(context_data);
let idx: u32 = idx.try_into().unwrap();
unsafe {
LAST_CTX_ENCODED = Some((*self, idx));
}
// In debug mode, check that the round-trip decoding always matches
debug_assert!(Self::decode(idx) == *self);
idx
}
pub fn decode(start_idx: u32) -> Context {
if start_idx == 0 {
return Context::default();
};
let context_data = CodegenGlobals::get_context_data();
let ctx = Self::decode_from(context_data, start_idx as usize);
// Keep track of the fact that this context was previously encoded
//ctx.decoded_from = start_idx;
ctx
}
// Encode into a compressed context representation in a bit vector
fn encode_into(&self, bits: &mut BitVector) -> usize {
let start_idx = bits.num_bits();
// NOTE: this value is often zero or falls within
// a small range, so could be compressed
//println!("stack_size={}", self.stack_size);
//println!("sp_offset={}", self.sp_offset);
//println!("chain_depth_and_flags={}", self.chain_depth_and_flags);
// Most of the time, the stack size is small and sp offset has the same value
if (self.stack_size as i64) == (self.sp_offset as i64) && self.stack_size < 4 {
// One single bit to signify a compact stack_size/sp_offset encoding
bits.push_u1(1);
bits.push_u2(self.stack_size);
} else {
// Full stack size encoding
bits.push_u1(0);
// Number of values currently on the temporary stack
bits.push_u8(self.stack_size);
// sp_offset: i8,
bits.push_u8(self.sp_offset as u8);
}
// Bitmap of which stack temps are in a register
let RegTemps(reg_temps) = self.reg_temps;
bits.push_u8(reg_temps);
// chain_depth_and_flags: u8,
bits.push_u8(self.chain_depth_and_flags);
// Encode the self type if known
if self.self_type != Type::Unknown {
bits.push_op(CtxOp::SetSelfType);
bits.push_u4(self.self_type as u8);
}
// Encode the local types if known
for local_idx in 0..MAX_LOCAL_TYPES {
let t = self.get_local_type(local_idx);
if t != Type::Unknown {
bits.push_op(CtxOp::SetLocalType);
bits.push_u3(local_idx as u8);
bits.push_u4(t as u8);
}
}
// Encode stack temps
for stack_idx in 0..MAX_TEMP_TYPES {
let mapping = self.get_temp_mapping(stack_idx);
match mapping.get_kind() {
MapToStack => {
let t = mapping.get_type();
if t != Type::Unknown {
// Temp idx (3 bits), known type (4 bits)
bits.push_op(CtxOp::SetTempType);
bits.push_u3(stack_idx as u8);
bits.push_u4(t as u8);
}
}
MapToLocal => {
// Temp idx (3 bits), local idx (3 bits)
let local_idx = mapping.get_local_idx();
bits.push_op(CtxOp::MapTempLocal);
bits.push_u3(stack_idx as u8);
bits.push_u3(local_idx as u8);
}
MapToSelf => {
// Temp idx (3 bits)
bits.push_op(CtxOp::MapTempSelf);
bits.push_u3(stack_idx as u8);
}
}
}
// Inline block pointer
if self.inline_block != 0 {
bits.push_op(CtxOp::SetInlineBlock);
bits.push_uint(self.inline_block, 64);
}
// TODO: should we add an op for end-of-encoding,
// or store num ops at the beginning?
bits.push_op(CtxOp::EndOfCode);
start_idx
}
// Decode a compressed context representation from a bit vector
fn decode_from(bits: &BitVector, start_idx: usize) -> Context {
let mut ctx = Context::default();
let mut idx = start_idx;
// Small vs large stack size encoding
if bits.read_u1(&mut idx) == 1 {
ctx.stack_size = bits.read_u2(&mut idx);
ctx.sp_offset = ctx.stack_size as i8;
} else {
ctx.stack_size = bits.read_u8(&mut idx);
ctx.sp_offset = bits.read_u8(&mut idx) as i8;
}
// Bitmap of which stack temps are in a register
ctx.reg_temps = RegTemps(bits.read_u8(&mut idx));
// chain_depth_and_flags: u8
ctx.chain_depth_and_flags = bits.read_u8(&mut idx);
loop {
//println!("reading op");
let op = bits.read_op(&mut idx);
//println!("got op {:?}", op);
match op {
CtxOp::SetSelfType => {
ctx.self_type = unsafe { transmute(bits.read_u4(&mut idx)) };
}
CtxOp::SetLocalType => {
let local_idx = bits.read_u3(&mut idx) as usize;
let t = unsafe { transmute(bits.read_u4(&mut idx)) };
ctx.set_local_type(local_idx, t);
}
// Map temp to stack (known type)
CtxOp::SetTempType => {
let temp_idx = bits.read_u3(&mut idx) as usize;
let t = unsafe { transmute(bits.read_u4(&mut idx)) };
ctx.set_temp_mapping(temp_idx, TempMapping::map_to_stack(t));
}
// Map temp to local
CtxOp::MapTempLocal => {
let temp_idx = bits.read_u3(&mut idx) as usize;
let local_idx = bits.read_u3(&mut idx);
ctx.set_temp_mapping(temp_idx, TempMapping::map_to_local(local_idx));
}
// Map temp to self
CtxOp::MapTempSelf => {
let temp_idx = bits.read_u3(&mut idx) as usize;
ctx.set_temp_mapping(temp_idx, TempMapping::map_to_self());
}
// Inline block pointer
CtxOp::SetInlineBlock => {
ctx.inline_block = bits.read_uint(&mut idx, 64);
}
CtxOp::EndOfCode => break,
}
}
ctx
}
}
/// Tuple of (iseq, idx) used to identify basic blocks
/// There are a lot of blockid objects so we try to keep the size small.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
@ -659,7 +1226,7 @@ impl BranchTarget {
}
}
fn get_ctx(&self) -> Context {
fn get_ctx(&self) -> u32 {
match self {
BranchTarget::Stub(stub) => stub.ctx,
BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.ctx,
@ -686,7 +1253,7 @@ struct BranchStub {
address: Option<CodePtr>,
iseq: Cell<IseqPtr>,
iseq_idx: IseqIdx,
ctx: Context,
ctx: u32,
}
/// Store info about an outgoing branch in a code segment
@ -808,6 +1375,9 @@ impl PendingBranch {
return Some(block.start_addr);
}
// Compress/encode the context
let ctx = Context::encode(ctx);
// The branch struct is uninitialized right now but as a stable address.
// We make sure the stub runs after the branch is initialized.
let branch_struct_addr = self.uninit_branch.as_ptr() as usize;
@ -819,7 +1389,7 @@ impl PendingBranch {
address: Some(stub_addr),
iseq: Cell::new(target.iseq),
iseq_idx: target.idx,
ctx: *ctx,
ctx,
})))));
}
@ -912,7 +1482,7 @@ pub struct Block {
// Context at the start of the block
// This should never be mutated
ctx: Context,
ctx: u32,
// Positions where the generated code starts and ends
start_addr: CodePtr,
@ -1085,15 +1655,6 @@ pub fn for_each_iseq<F: FnMut(IseqPtr)>(mut callback: F) {
unsafe { rb_yjit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) };
}
/// Iterate over all ISEQ payloads
pub fn for_each_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) {
for_each_iseq(|iseq| {
if let Some(iseq_payload) = get_iseq_payload(iseq) {
callback(iseq_payload);
}
});
}
/// Iterate over all on-stack ISEQs
pub fn for_each_on_stack_iseq<F: FnMut(IseqPtr)>(mut callback: F) {
unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) {
@ -1425,13 +1986,17 @@ pub fn take_version_list(blockid: BlockId) -> VersionList {
fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
let insn_idx = blockid.idx.as_usize();
match get_iseq_payload(blockid.iseq) {
// FIXME: this counting logic is going to be expensive.
// We should avoid it if possible
Some(payload) => {
payload
.version_map
.get(insn_idx)
.map(|versions| {
versions.iter().filter(|&&version|
unsafe { version.as_ref() }.ctx.inline() == inlined
Context::decode(unsafe { version.as_ref() }.ctx).inline() == inlined
).count()
})
.unwrap_or(0)
@ -1476,10 +2041,11 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
// For each version matching the blockid
for blockref in versions.iter() {
let block = unsafe { blockref.as_ref() };
let block_ctx = Context::decode(block.ctx);
// Note that we always prefer the first matching
// version found because of inline-cache chains
match ctx.diff(&block.ctx) {
match ctx.diff(&block_ctx) {
TypeDiff::Compatible(diff) if diff < best_diff => {
best_version = Some(*blockref);
best_diff = diff;
@ -1561,7 +2127,7 @@ unsafe fn add_block_version(blockref: BlockRef, cb: &CodeBlock) {
let block = unsafe { blockref.as_ref() };
// Function entry blocks must have stack size 0
assert!(!(block.iseq_range.start == 0 && block.ctx.stack_size > 0));
debug_assert!(!(block.iseq_range.start == 0 && Context::decode(block.ctx).stack_size > 0));
let version_list = get_or_create_version_list(block.get_blockid());
@ -1620,12 +2186,14 @@ impl JITState {
incr_counter_by!(num_gc_obj_refs, gc_obj_offsets.len());
let ctx = Context::encode(&self.get_starting_ctx());
// Make the new block
let block = MaybeUninit::new(Block {
start_addr,
iseq: Cell::new(self.get_iseq()),
iseq_range: self.get_starting_insn_idx()..end_insn_idx,
ctx: self.get_starting_ctx(),
ctx,
end_addr: Cell::new(end_addr),
incoming: MutableBranchList(Cell::default()),
gc_obj_offsets: gc_obj_offsets.into_boxed_slice(),
@ -2382,6 +2950,7 @@ fn gen_block_series_body(
};
// Generate new block using context from the last branch.
let requested_ctx = Context::decode(requested_ctx);
let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb);
// If the block failed to compile
@ -2769,7 +3338,8 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
return target.get_address().unwrap().raw_ptr(cb);
}
(target.get_blockid(), target.get_ctx())
let target_ctx = Context::decode(target.get_ctx());
(target.get_blockid(), target_ctx)
};
let (cfp, original_interp_sp) = unsafe {
@ -2906,7 +3476,7 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
/// Generate a "stub", a piece of code that calls the compiler back when run.
/// A piece of code that redeems for more code; a thunk for code.
fn gen_branch_stub(
ctx: &Context,
ctx: u32,
ocb: &mut OutlinedCb,
branch_struct_address: usize,
target_idx: u32,
@ -2914,8 +3484,8 @@ fn gen_branch_stub(
let ocb = ocb.unwrap();
let mut asm = Assembler::new();
asm.ctx = *ctx;
asm.set_reg_temps(ctx.reg_temps);
asm.ctx = Context::decode(ctx);
asm.set_reg_temps(asm.ctx.reg_temps);
asm_comment!(asm, "branch stub hit");
if asm.ctx.is_return_landing() {
@ -3112,7 +3682,7 @@ pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm:
// compile the target block right after this one (fallthrough).
BranchTarget::Stub(Box::new(BranchStub {
address: None,
ctx: *ctx,
ctx: Context::encode(ctx),
iseq: Cell::new(target0.iseq),
iseq_idx: target0.idx,
}))
@ -3364,7 +3934,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
}
// Create a stub for this branch target
let stub_addr = gen_branch_stub(&block.ctx, ocb, branchref.as_ptr() as usize, target_idx as u32);
let stub_addr = gen_branch_stub(block.ctx, ocb, branchref.as_ptr() as usize, target_idx as u32);
// In case we were unable to generate a stub (e.g. OOM). Use the block's
// exit instead of a stub for the block. It's important that we
@ -3546,11 +4116,6 @@ mod tests {
assert_eq!(t.get_local_idx(), 7);
}
#[test]
fn context_size() {
assert_eq!(mem::size_of::<Context>(), 23);
}
#[test]
fn types() {
// Valid src => dst
@ -3695,7 +4260,7 @@ mod tests {
iseq: Cell::new(ptr::null()),
iseq_idx: 0,
address: None,
ctx: Context::default(),
ctx: 0,
})))))]
};
// For easier soundness reasoning, make sure the reference returned does not out live the
@ -3728,7 +4293,7 @@ mod tests {
iseq: Cell::new(ptr::null()),
iseq_idx: 0,
address: None,
ctx: Context::default(),
ctx: 0,
})))));
// Invalid ISeq; we never dereference it.
let secret_iseq = NonNull::<rb_iseq_t>::dangling().as_ptr();

View File

@ -10,8 +10,6 @@ use std::time::Instant;
use std::collections::HashMap;
use crate::codegen::CodegenGlobals;
use crate::core::Context;
use crate::core::for_each_iseq_payload;
use crate::cruby::*;
use crate::options::*;
use crate::yjit::yjit_enabled_p;
@ -557,6 +555,7 @@ make_counters! {
branch_insn_count,
branch_known_count,
max_inline_versions,
num_contexts_encoded,
freed_iseq_count,
@ -641,8 +640,8 @@ pub extern "C" fn rb_yjit_print_stats_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE
/// Primitive called in yjit.rb.
/// Export all YJIT statistics as a Ruby hash.
#[no_mangle]
pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE, context: VALUE) -> VALUE {
with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict(context == Qtrue))
pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict())
}
/// Primitive called in yjit.rb
@ -701,7 +700,7 @@ pub extern "C" fn rb_yjit_incr_counter(counter_name: *const std::os::raw::c_char
}
/// Export all YJIT statistics as a Ruby hash.
fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
fn rb_yjit_gen_stats_dict() -> VALUE {
// If YJIT is not enabled, return Qnil
if !yjit_enabled_p() {
return Qnil;
@ -744,14 +743,9 @@ fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
// Rust global allocations in bytes
hash_aset_usize!(hash, "yjit_alloc_size", GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst));
// `context` is true at RubyVM::YJIT._print_stats for --yjit-stats. It's false by default
// for RubyVM::YJIT.runtime_stats because counting all Contexts could be expensive.
if context {
let live_context_count = get_live_context_count();
let context_size = std::mem::size_of::<Context>();
hash_aset_usize!(hash, "live_context_count", live_context_count);
hash_aset_usize!(hash, "live_context_size", live_context_count * context_size);
}
// How many bytes we are using to store context data
let context_data = CodegenGlobals::get_context_data();
hash_aset_usize!(hash, "context_data_bytes", context_data.num_bytes());
// VM instructions count
hash_aset_usize!(hash, "vm_insns_count", rb_vm_insns_count as usize);
@ -846,21 +840,6 @@ fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
hash
}
fn get_live_context_count() -> usize {
let mut count = 0;
for_each_iseq_payload(|iseq_payload| {
for blocks in iseq_payload.version_map.iter() {
for block in blocks.iter() {
count += unsafe { block.as_ref() }.get_ctx_count();
}
}
for block in iseq_payload.dead_blocks.iter() {
count += unsafe { block.as_ref() }.get_ctx_count();
}
});
count
}
/// Record the backtrace when a YJIT exit occurs. This functionality requires
/// that the stats feature is enabled as well as the --yjit-trace-exits option.
///