YJIT: implement variable-length context encoding scheme (#10888)
* Implement BitVector data structure for variable-length context encoding * Rename method to make intent clearer * Rename write_uint => push_uint to make intent clearer * Implement debug trait for BitVector * Fix bug in BitVector::read_uint_at(), enable more tests * Add one more test for good measure * Start sketching Context::encode() * Progress on variable length context encoding * Add tests. Fix bug. * Encode stack state * Add comments. Try to estimate context encoding size. * More compact encoding for stack size * Commit before rebase * Change Context::encode() to take a BitVector as input * Refactor BitVector::read_uint(), add helper read functions * Implement Context::decode() function. Add test. * Fix bug, add tests * Rename methods * Add Context::encode() and decode() methods using global data * Make encode and decode methods use u32 indices * Refactor YJIT to use variable-length context encoding * Tag functions as allow unused * Add a simple caching mechanism and stats for bytes per context etc * Add comments, fix formatting * Grow vector of bytes by 1.2x instead of 2x * Add debug assert to check round-trip encoding-decoding * Take some rustfmt formatting * Add decoded_from field to Context to reuse previous encodings * Remove olde context stats * Re-add stack_size assert * Disable decoded_from optimization for now
This commit is contained in:
parent
faad2bc6e1
commit
425e630ce7
2
yjit.c
2
yjit.c
@ -1245,7 +1245,7 @@ rb_yjit_set_exception_return(rb_control_frame_t *cfp, void *leave_exit, void *le
|
||||
VALUE rb_yjit_stats_enabled_p(rb_execution_context_t *ec, VALUE self);
|
||||
VALUE rb_yjit_print_stats_p(rb_execution_context_t *ec, VALUE self);
|
||||
VALUE rb_yjit_trace_exit_locations_enabled_p(rb_execution_context_t *ec, VALUE self);
|
||||
VALUE rb_yjit_get_stats(rb_execution_context_t *ec, VALUE self, VALUE context);
|
||||
VALUE rb_yjit_get_stats(rb_execution_context_t *ec, VALUE self);
|
||||
VALUE rb_yjit_reset_stats_bang(rb_execution_context_t *ec, VALUE self);
|
||||
VALUE rb_yjit_disasm_iseq(rb_execution_context_t *ec, VALUE self, VALUE iseq);
|
||||
VALUE rb_yjit_insns_compiled(rb_execution_context_t *ec, VALUE self, VALUE iseq);
|
||||
|
14
yjit.rb
14
yjit.rb
@ -155,8 +155,8 @@ module RubyVM::YJIT
|
||||
|
||||
# Return a hash for statistics generated for the `--yjit-stats` command line option.
|
||||
# Return `nil` when option is not passed or unavailable.
|
||||
def self.runtime_stats(context: false)
|
||||
stats = Primitive.rb_yjit_get_stats(context)
|
||||
def self.runtime_stats()
|
||||
stats = Primitive.rb_yjit_get_stats()
|
||||
return stats if stats.nil?
|
||||
|
||||
stats[:object_shape_count] = Primitive.object_shape_count
|
||||
@ -313,7 +313,7 @@ module RubyVM::YJIT
|
||||
|
||||
# Format and print out counters
|
||||
def _print_stats(out: $stderr) # :nodoc:
|
||||
stats = runtime_stats(context: true)
|
||||
stats = runtime_stats()
|
||||
return unless Primitive.rb_yjit_stats_enabled_p
|
||||
|
||||
out.puts("***YJIT: Printing YJIT statistics on exit***")
|
||||
@ -388,8 +388,12 @@ module RubyVM::YJIT
|
||||
|
||||
out.puts "freed_code_size: " + format_number(13, stats[:freed_code_size])
|
||||
out.puts "yjit_alloc_size: " + format_number(13, stats[:yjit_alloc_size]) if stats.key?(:yjit_alloc_size)
|
||||
out.puts "live_context_size: " + format_number(13, stats[:live_context_size])
|
||||
out.puts "live_context_count: " + format_number(13, stats[:live_context_count])
|
||||
|
||||
bytes_per_context = stats[:context_data_bytes].fdiv(stats[:num_contexts_encoded])
|
||||
out.puts "context_data_bytes: " + format_number(13, stats[:context_data_bytes])
|
||||
out.puts "num_contexts_encoded: " + format_number(13, stats[:num_contexts_encoded])
|
||||
out.puts "bytes_per_context: " + ("%13.2f" % bytes_per_context)
|
||||
|
||||
out.puts "live_page_count: " + format_number(13, stats[:live_page_count])
|
||||
out.puts "freed_page_count: " + format_number(13, stats[:freed_page_count])
|
||||
out.puts "code_gc_count: " + format_number(13, stats[:code_gc_count])
|
||||
|
@ -5789,7 +5789,7 @@ fn jit_rb_str_getbyte(
|
||||
RUBY_OFFSET_RSTRING_LEN as i32,
|
||||
);
|
||||
|
||||
// Exit if the indes is out of bounds
|
||||
// Exit if the index is out of bounds
|
||||
asm.cmp(idx, str_len_opnd);
|
||||
asm.jge(Target::side_exit(Counter::getbyte_idx_out_of_bounds));
|
||||
|
||||
@ -10333,6 +10333,9 @@ fn yjit_reg_method(klass: VALUE, mid_str: &str, gen_fn: MethodGenFn) {
|
||||
|
||||
/// Global state needed for code generation
|
||||
pub struct CodegenGlobals {
|
||||
/// Flat vector of bits to store compressed context data
|
||||
context_data: BitVector,
|
||||
|
||||
/// Inline code block (fast path)
|
||||
inline_cb: CodeBlock,
|
||||
|
||||
@ -10448,6 +10451,7 @@ impl CodegenGlobals {
|
||||
ocb.unwrap().mark_all_executable();
|
||||
|
||||
let codegen_globals = CodegenGlobals {
|
||||
context_data: BitVector::new(),
|
||||
inline_cb: cb,
|
||||
outlined_cb: ocb,
|
||||
leave_exit_code,
|
||||
@ -10476,6 +10480,11 @@ impl CodegenGlobals {
|
||||
unsafe { CODEGEN_GLOBALS.as_mut().is_some() }
|
||||
}
|
||||
|
||||
/// Get a mutable reference to the context data
|
||||
pub fn get_context_data() -> &'static mut BitVector {
|
||||
&mut CodegenGlobals::get_instance().context_data
|
||||
}
|
||||
|
||||
/// Get a mutable reference to the inline code block
|
||||
pub fn get_inline_cb() -> &'static mut CodeBlock {
|
||||
&mut CodegenGlobals::get_instance().inline_cb
|
||||
|
627
yjit/src/core.rs
627
yjit/src/core.rs
@ -457,8 +457,13 @@ const CHAIN_DEPTH_MASK: u8 = 0b00111111; // 63
|
||||
/// Contains information we can use to specialize/optimize code
|
||||
/// There are a lot of context objects so we try to keep the size small.
|
||||
#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)]
|
||||
#[repr(packed)]
|
||||
pub struct Context {
|
||||
// FIXME: decoded_from breaks == on contexts
|
||||
/*
|
||||
// Offset at which this context was previously encoded (zero if not)
|
||||
decoded_from: u32,
|
||||
*/
|
||||
|
||||
// Number of values currently on the temporary stack
|
||||
stack_size: u8,
|
||||
|
||||
@ -498,6 +503,568 @@ pub struct Context {
|
||||
inline_block: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct BitVector {
|
||||
// Flat vector of bytes to write into
|
||||
bytes: Vec<u8>,
|
||||
|
||||
// Number of bits taken out of bytes allocated
|
||||
num_bits: usize,
|
||||
}
|
||||
|
||||
impl BitVector {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
bytes: Vec::with_capacity(4096),
|
||||
num_bits: 0,
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn num_bits(&self) -> usize {
|
||||
self.num_bits
|
||||
}
|
||||
|
||||
// Total number of bytes taken
|
||||
#[allow(unused)]
|
||||
pub fn num_bytes(&self) -> usize {
|
||||
(self.num_bits / 8) + if (self.num_bits % 8) != 0 { 1 } else { 0 }
|
||||
}
|
||||
|
||||
// Write/append an unsigned integer value
|
||||
fn push_uint(&mut self, mut val: u64, mut num_bits: usize) {
|
||||
assert!(num_bits <= 64);
|
||||
|
||||
// Mask out bits above the number of bits requested
|
||||
let mut val_bits = val;
|
||||
if num_bits < 64 {
|
||||
val_bits &= (1 << num_bits) - 1;
|
||||
assert!(val == val_bits);
|
||||
}
|
||||
|
||||
// Number of bits encoded in the last byte
|
||||
let rem_bits = self.num_bits % 8;
|
||||
|
||||
// Encode as many bits as we can in this last byte
|
||||
if rem_bits != 0 {
|
||||
let num_enc = std::cmp::min(num_bits, 8 - rem_bits);
|
||||
let bit_mask = (1 << num_enc) - 1;
|
||||
let frac_bits = (val & bit_mask) << rem_bits;
|
||||
let frac_bits: u8 = frac_bits.try_into().unwrap();
|
||||
let last_byte_idx = self.bytes.len() - 1;
|
||||
self.bytes[last_byte_idx] |= frac_bits;
|
||||
|
||||
self.num_bits += num_enc;
|
||||
num_bits -= num_enc;
|
||||
val >>= num_enc;
|
||||
}
|
||||
|
||||
// While we have bits left to encode
|
||||
while num_bits > 0 {
|
||||
// Grow with a 1.2x growth factor instead of 2x
|
||||
assert!(self.num_bits % 8 == 0);
|
||||
let num_bytes = self.num_bits / 8;
|
||||
if num_bytes == self.bytes.capacity() {
|
||||
self.bytes.reserve_exact(self.bytes.len() / 5);
|
||||
}
|
||||
|
||||
let bits = val & 0xFF;
|
||||
let bits: u8 = bits.try_into().unwrap();
|
||||
self.bytes.push(bits);
|
||||
|
||||
let bits_to_encode = std::cmp::min(num_bits, 8);
|
||||
self.num_bits += bits_to_encode;
|
||||
num_bits -= bits_to_encode;
|
||||
val >>= bits_to_encode;
|
||||
}
|
||||
}
|
||||
|
||||
fn push_u8(&mut self, val: u8) {
|
||||
self.push_uint(val as u64, 8);
|
||||
}
|
||||
|
||||
fn push_u4(&mut self, val: u8) {
|
||||
assert!(val < 16);
|
||||
self.push_uint(val as u64, 4);
|
||||
}
|
||||
|
||||
fn push_u3(&mut self, val: u8) {
|
||||
assert!(val < 8);
|
||||
self.push_uint(val as u64, 3);
|
||||
}
|
||||
|
||||
fn push_u2(&mut self, val: u8) {
|
||||
assert!(val < 4);
|
||||
self.push_uint(val as u64, 2);
|
||||
}
|
||||
|
||||
fn push_u1(&mut self, val: u8) {
|
||||
assert!(val < 2);
|
||||
self.push_uint(val as u64, 1);
|
||||
}
|
||||
|
||||
// Push a context encoding opcode
|
||||
fn push_op(&mut self, op: CtxOp) {
|
||||
self.push_u4(op as u8);
|
||||
}
|
||||
|
||||
// Read a uint value at a given bit index
|
||||
// The bit index is incremented after the value is read
|
||||
fn read_uint(&self, bit_idx: &mut usize, mut num_bits: usize) -> u64 {
|
||||
let start_bit_idx = *bit_idx;
|
||||
let mut cur_idx = *bit_idx;
|
||||
|
||||
// Read the bits in the first byte
|
||||
let bit_mod = cur_idx % 8;
|
||||
let bits_in_byte = self.bytes[cur_idx / 8] >> bit_mod;
|
||||
|
||||
let num_bits_in_byte = std::cmp::min(num_bits, 8 - bit_mod);
|
||||
cur_idx += num_bits_in_byte;
|
||||
num_bits -= num_bits_in_byte;
|
||||
|
||||
let mut out_bits = (bits_in_byte as u64) & ((1 << num_bits_in_byte) - 1);
|
||||
|
||||
// While we have bits left to read
|
||||
while num_bits > 0 {
|
||||
let num_bits_in_byte = std::cmp::min(num_bits, 8);
|
||||
assert!(cur_idx % 8 == 0);
|
||||
let byte = self.bytes[cur_idx / 8] as u64;
|
||||
|
||||
let bits_in_byte = byte & ((1 << num_bits) - 1);
|
||||
out_bits |= bits_in_byte << (cur_idx - start_bit_idx);
|
||||
|
||||
// Move to the next byte/offset
|
||||
cur_idx += num_bits_in_byte;
|
||||
num_bits -= num_bits_in_byte;
|
||||
}
|
||||
|
||||
// Update the read index
|
||||
*bit_idx = cur_idx;
|
||||
|
||||
out_bits
|
||||
}
|
||||
|
||||
fn read_u8(&self, bit_idx: &mut usize) -> u8 {
|
||||
self.read_uint(bit_idx, 8) as u8
|
||||
}
|
||||
|
||||
fn read_u4(&self, bit_idx: &mut usize) -> u8 {
|
||||
self.read_uint(bit_idx, 4) as u8
|
||||
}
|
||||
|
||||
fn read_u3(&self, bit_idx: &mut usize) -> u8 {
|
||||
self.read_uint(bit_idx, 3) as u8
|
||||
}
|
||||
|
||||
fn read_u2(&self, bit_idx: &mut usize) -> u8 {
|
||||
self.read_uint(bit_idx, 2) as u8
|
||||
}
|
||||
|
||||
fn read_u1(&self, bit_idx: &mut usize) -> u8 {
|
||||
self.read_uint(bit_idx, 1) as u8
|
||||
}
|
||||
|
||||
fn read_op(&self, bit_idx: &mut usize) -> CtxOp {
|
||||
unsafe { std::mem::transmute(self.read_u4(bit_idx)) }
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for BitVector {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
// We print the higher bytes first
|
||||
for (idx, byte) in self.bytes.iter().enumerate().rev() {
|
||||
write!(f, "{:08b}", byte)?;
|
||||
|
||||
// Insert a separator between each byte
|
||||
if idx > 0 {
|
||||
write!(f, "|")?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod bitvector_tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn write_3() {
|
||||
let mut arr = BitVector::new();
|
||||
arr.push_uint(3, 2);
|
||||
assert!(arr.read_uint(&mut 0, 2) == 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_11() {
|
||||
let mut arr = BitVector::new();
|
||||
arr.push_uint(1, 1);
|
||||
arr.push_uint(1, 1);
|
||||
assert!(arr.read_uint(&mut 0, 2) == 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_11_overlap() {
|
||||
let mut arr = BitVector::new();
|
||||
arr.push_uint(0, 7);
|
||||
arr.push_uint(3, 2);
|
||||
arr.push_uint(1, 1);
|
||||
|
||||
//dbg!(arr.read_uint(7, 2));
|
||||
assert!(arr.read_uint(&mut 7, 2) == 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_ff_0() {
|
||||
let mut arr = BitVector::new();
|
||||
arr.push_uint(0xFF, 8);
|
||||
assert!(arr.read_uint(&mut 0, 8) == 0xFF);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_ff_3() {
|
||||
// Write 0xFF at bit index 3
|
||||
let mut arr = BitVector::new();
|
||||
arr.push_uint(0, 3);
|
||||
arr.push_uint(0xFF, 8);
|
||||
assert!(arr.read_uint(&mut 3, 8) == 0xFF);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_ff_sandwich() {
|
||||
// Write 0xFF sandwiched between zeros
|
||||
let mut arr = BitVector::new();
|
||||
arr.push_uint(0, 3);
|
||||
arr.push_u8(0xFF);
|
||||
arr.push_uint(0, 3);
|
||||
assert!(arr.read_uint(&mut 3, 8) == 0xFF);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_read_u32_max() {
|
||||
let mut arr = BitVector::new();
|
||||
arr.push_uint(0xFF_FF_FF_FF, 32);
|
||||
assert!(arr.read_uint(&mut 0, 32) == 0xFF_FF_FF_FF);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_read_u32_max_64b() {
|
||||
let mut arr = BitVector::new();
|
||||
arr.push_uint(0xFF_FF_FF_FF, 64);
|
||||
assert!(arr.read_uint(&mut 0, 64) == 0xFF_FF_FF_FF);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_read_u64_max() {
|
||||
let mut arr = BitVector::new();
|
||||
arr.push_uint(u64::MAX, 64);
|
||||
assert!(arr.read_uint(&mut 0, 64) == u64::MAX);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_default() {
|
||||
let mut bits = BitVector::new();
|
||||
let ctx = Context::default();
|
||||
let start_idx = ctx.encode_into(&mut bits);
|
||||
assert!(start_idx == 0);
|
||||
assert!(bits.num_bits() > 0);
|
||||
assert!(bits.num_bytes() > 0);
|
||||
|
||||
// Make sure that the round trip matches the input
|
||||
let ctx2 = Context::decode_from(&bits, 0);
|
||||
assert!(ctx2 == ctx);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_default_2x() {
|
||||
let mut bits = BitVector::new();
|
||||
|
||||
let ctx0 = Context::default();
|
||||
let idx0 = ctx0.encode_into(&mut bits);
|
||||
|
||||
let mut ctx1 = Context::default();
|
||||
ctx1.reg_temps = RegTemps(1);
|
||||
let idx1 = ctx1.encode_into(&mut bits);
|
||||
|
||||
// Make sure that we can encode two contexts successively
|
||||
let ctx0_dec = Context::decode_from(&bits, idx0);
|
||||
let ctx1_dec = Context::decode_from(&bits, idx1);
|
||||
assert!(ctx0_dec == ctx0);
|
||||
assert!(ctx1_dec == ctx1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regress_reg_temps() {
|
||||
let mut bits = BitVector::new();
|
||||
let mut ctx = Context::default();
|
||||
ctx.reg_temps = RegTemps(1);
|
||||
ctx.encode_into(&mut bits);
|
||||
|
||||
let b0 = bits.read_u1(&mut 0);
|
||||
assert!(b0 == 1);
|
||||
|
||||
// Make sure that the round trip matches the input
|
||||
let ctx2 = Context::decode_from(&bits, 0);
|
||||
assert!(ctx2 == ctx);
|
||||
}
|
||||
}
|
||||
|
||||
// Context encoding opcodes (4 bits)
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
#[repr(u8)]
|
||||
enum CtxOp {
|
||||
// Self type (4 bits)
|
||||
SetSelfType = 0,
|
||||
|
||||
// Local idx (3 bits), temp type (4 bits)
|
||||
SetLocalType,
|
||||
|
||||
// Map stack temp to self with known type
|
||||
// Temp idx (3 bits), known type (4 bits)
|
||||
SetTempType,
|
||||
|
||||
// Map stack temp to a local variable
|
||||
// Temp idx (3 bits), local idx (3 bits)
|
||||
MapTempLocal,
|
||||
|
||||
// Map a stack temp to self
|
||||
// Temp idx (3 bits)
|
||||
MapTempSelf,
|
||||
|
||||
// Set inline block pointer (8 bytes)
|
||||
SetInlineBlock,
|
||||
|
||||
// End of encoding
|
||||
EndOfCode,
|
||||
}
|
||||
|
||||
// Cache of the last context encoded
|
||||
// Empirically this saves a few percent of memory
|
||||
// We can experiment with varying the size of this cache
|
||||
static mut LAST_CTX_ENCODED: Option<(Context, u32)> = None;
|
||||
|
||||
impl Context {
|
||||
pub fn encode(&self) -> u32 {
|
||||
incr_counter!(num_contexts_encoded);
|
||||
|
||||
if *self == Context::default() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
// If this context was previously decoded and was not changed since
|
||||
if self.decoded_from != 0 && Self::decode(self.decoded_from) == *self {
|
||||
return self.decoded_from;
|
||||
}
|
||||
*/
|
||||
|
||||
// If this context was recently encoded (cache check)
|
||||
unsafe {
|
||||
if let Some((ctx, idx)) = LAST_CTX_ENCODED {
|
||||
if ctx == *self {
|
||||
return idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let context_data = CodegenGlobals::get_context_data();
|
||||
|
||||
// Offset 0 is reserved for the default context
|
||||
if context_data.num_bits() == 0 {
|
||||
context_data.push_u1(0);
|
||||
}
|
||||
|
||||
let idx = self.encode_into(context_data);
|
||||
let idx: u32 = idx.try_into().unwrap();
|
||||
|
||||
unsafe {
|
||||
LAST_CTX_ENCODED = Some((*self, idx));
|
||||
}
|
||||
|
||||
// In debug mode, check that the round-trip decoding always matches
|
||||
debug_assert!(Self::decode(idx) == *self);
|
||||
|
||||
idx
|
||||
}
|
||||
|
||||
pub fn decode(start_idx: u32) -> Context {
|
||||
if start_idx == 0 {
|
||||
return Context::default();
|
||||
};
|
||||
|
||||
let context_data = CodegenGlobals::get_context_data();
|
||||
let ctx = Self::decode_from(context_data, start_idx as usize);
|
||||
|
||||
// Keep track of the fact that this context was previously encoded
|
||||
//ctx.decoded_from = start_idx;
|
||||
|
||||
ctx
|
||||
}
|
||||
|
||||
// Encode into a compressed context representation in a bit vector
|
||||
fn encode_into(&self, bits: &mut BitVector) -> usize {
|
||||
let start_idx = bits.num_bits();
|
||||
|
||||
// NOTE: this value is often zero or falls within
|
||||
// a small range, so could be compressed
|
||||
//println!("stack_size={}", self.stack_size);
|
||||
//println!("sp_offset={}", self.sp_offset);
|
||||
//println!("chain_depth_and_flags={}", self.chain_depth_and_flags);
|
||||
|
||||
// Most of the time, the stack size is small and sp offset has the same value
|
||||
if (self.stack_size as i64) == (self.sp_offset as i64) && self.stack_size < 4 {
|
||||
// One single bit to signify a compact stack_size/sp_offset encoding
|
||||
bits.push_u1(1);
|
||||
bits.push_u2(self.stack_size);
|
||||
} else {
|
||||
// Full stack size encoding
|
||||
bits.push_u1(0);
|
||||
|
||||
// Number of values currently on the temporary stack
|
||||
bits.push_u8(self.stack_size);
|
||||
|
||||
// sp_offset: i8,
|
||||
bits.push_u8(self.sp_offset as u8);
|
||||
}
|
||||
|
||||
// Bitmap of which stack temps are in a register
|
||||
let RegTemps(reg_temps) = self.reg_temps;
|
||||
bits.push_u8(reg_temps);
|
||||
|
||||
// chain_depth_and_flags: u8,
|
||||
bits.push_u8(self.chain_depth_and_flags);
|
||||
|
||||
// Encode the self type if known
|
||||
if self.self_type != Type::Unknown {
|
||||
bits.push_op(CtxOp::SetSelfType);
|
||||
bits.push_u4(self.self_type as u8);
|
||||
}
|
||||
|
||||
// Encode the local types if known
|
||||
for local_idx in 0..MAX_LOCAL_TYPES {
|
||||
let t = self.get_local_type(local_idx);
|
||||
if t != Type::Unknown {
|
||||
bits.push_op(CtxOp::SetLocalType);
|
||||
bits.push_u3(local_idx as u8);
|
||||
bits.push_u4(t as u8);
|
||||
}
|
||||
}
|
||||
|
||||
// Encode stack temps
|
||||
for stack_idx in 0..MAX_TEMP_TYPES {
|
||||
let mapping = self.get_temp_mapping(stack_idx);
|
||||
|
||||
match mapping.get_kind() {
|
||||
MapToStack => {
|
||||
let t = mapping.get_type();
|
||||
if t != Type::Unknown {
|
||||
// Temp idx (3 bits), known type (4 bits)
|
||||
bits.push_op(CtxOp::SetTempType);
|
||||
bits.push_u3(stack_idx as u8);
|
||||
bits.push_u4(t as u8);
|
||||
}
|
||||
}
|
||||
|
||||
MapToLocal => {
|
||||
// Temp idx (3 bits), local idx (3 bits)
|
||||
let local_idx = mapping.get_local_idx();
|
||||
bits.push_op(CtxOp::MapTempLocal);
|
||||
bits.push_u3(stack_idx as u8);
|
||||
bits.push_u3(local_idx as u8);
|
||||
}
|
||||
|
||||
MapToSelf => {
|
||||
// Temp idx (3 bits)
|
||||
bits.push_op(CtxOp::MapTempSelf);
|
||||
bits.push_u3(stack_idx as u8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Inline block pointer
|
||||
if self.inline_block != 0 {
|
||||
bits.push_op(CtxOp::SetInlineBlock);
|
||||
bits.push_uint(self.inline_block, 64);
|
||||
}
|
||||
|
||||
// TODO: should we add an op for end-of-encoding,
|
||||
// or store num ops at the beginning?
|
||||
bits.push_op(CtxOp::EndOfCode);
|
||||
|
||||
start_idx
|
||||
}
|
||||
|
||||
// Decode a compressed context representation from a bit vector
|
||||
fn decode_from(bits: &BitVector, start_idx: usize) -> Context {
|
||||
let mut ctx = Context::default();
|
||||
|
||||
let mut idx = start_idx;
|
||||
|
||||
// Small vs large stack size encoding
|
||||
if bits.read_u1(&mut idx) == 1 {
|
||||
ctx.stack_size = bits.read_u2(&mut idx);
|
||||
ctx.sp_offset = ctx.stack_size as i8;
|
||||
} else {
|
||||
ctx.stack_size = bits.read_u8(&mut idx);
|
||||
ctx.sp_offset = bits.read_u8(&mut idx) as i8;
|
||||
}
|
||||
|
||||
// Bitmap of which stack temps are in a register
|
||||
ctx.reg_temps = RegTemps(bits.read_u8(&mut idx));
|
||||
|
||||
// chain_depth_and_flags: u8
|
||||
ctx.chain_depth_and_flags = bits.read_u8(&mut idx);
|
||||
|
||||
loop {
|
||||
//println!("reading op");
|
||||
let op = bits.read_op(&mut idx);
|
||||
//println!("got op {:?}", op);
|
||||
|
||||
match op {
|
||||
CtxOp::SetSelfType => {
|
||||
ctx.self_type = unsafe { transmute(bits.read_u4(&mut idx)) };
|
||||
}
|
||||
|
||||
CtxOp::SetLocalType => {
|
||||
let local_idx = bits.read_u3(&mut idx) as usize;
|
||||
let t = unsafe { transmute(bits.read_u4(&mut idx)) };
|
||||
ctx.set_local_type(local_idx, t);
|
||||
}
|
||||
|
||||
// Map temp to stack (known type)
|
||||
CtxOp::SetTempType => {
|
||||
let temp_idx = bits.read_u3(&mut idx) as usize;
|
||||
let t = unsafe { transmute(bits.read_u4(&mut idx)) };
|
||||
ctx.set_temp_mapping(temp_idx, TempMapping::map_to_stack(t));
|
||||
}
|
||||
|
||||
// Map temp to local
|
||||
CtxOp::MapTempLocal => {
|
||||
let temp_idx = bits.read_u3(&mut idx) as usize;
|
||||
let local_idx = bits.read_u3(&mut idx);
|
||||
ctx.set_temp_mapping(temp_idx, TempMapping::map_to_local(local_idx));
|
||||
}
|
||||
|
||||
// Map temp to self
|
||||
CtxOp::MapTempSelf => {
|
||||
let temp_idx = bits.read_u3(&mut idx) as usize;
|
||||
ctx.set_temp_mapping(temp_idx, TempMapping::map_to_self());
|
||||
}
|
||||
|
||||
// Inline block pointer
|
||||
CtxOp::SetInlineBlock => {
|
||||
ctx.inline_block = bits.read_uint(&mut idx, 64);
|
||||
}
|
||||
|
||||
CtxOp::EndOfCode => break,
|
||||
}
|
||||
}
|
||||
|
||||
ctx
|
||||
}
|
||||
}
|
||||
|
||||
/// Tuple of (iseq, idx) used to identify basic blocks
|
||||
/// There are a lot of blockid objects so we try to keep the size small.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
|
||||
@ -659,7 +1226,7 @@ impl BranchTarget {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_ctx(&self) -> Context {
|
||||
fn get_ctx(&self) -> u32 {
|
||||
match self {
|
||||
BranchTarget::Stub(stub) => stub.ctx,
|
||||
BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.ctx,
|
||||
@ -686,7 +1253,7 @@ struct BranchStub {
|
||||
address: Option<CodePtr>,
|
||||
iseq: Cell<IseqPtr>,
|
||||
iseq_idx: IseqIdx,
|
||||
ctx: Context,
|
||||
ctx: u32,
|
||||
}
|
||||
|
||||
/// Store info about an outgoing branch in a code segment
|
||||
@ -808,6 +1375,9 @@ impl PendingBranch {
|
||||
return Some(block.start_addr);
|
||||
}
|
||||
|
||||
// Compress/encode the context
|
||||
let ctx = Context::encode(ctx);
|
||||
|
||||
// The branch struct is uninitialized right now but as a stable address.
|
||||
// We make sure the stub runs after the branch is initialized.
|
||||
let branch_struct_addr = self.uninit_branch.as_ptr() as usize;
|
||||
@ -819,7 +1389,7 @@ impl PendingBranch {
|
||||
address: Some(stub_addr),
|
||||
iseq: Cell::new(target.iseq),
|
||||
iseq_idx: target.idx,
|
||||
ctx: *ctx,
|
||||
ctx,
|
||||
})))));
|
||||
}
|
||||
|
||||
@ -912,7 +1482,7 @@ pub struct Block {
|
||||
|
||||
// Context at the start of the block
|
||||
// This should never be mutated
|
||||
ctx: Context,
|
||||
ctx: u32,
|
||||
|
||||
// Positions where the generated code starts and ends
|
||||
start_addr: CodePtr,
|
||||
@ -1085,15 +1655,6 @@ pub fn for_each_iseq<F: FnMut(IseqPtr)>(mut callback: F) {
|
||||
unsafe { rb_yjit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) };
|
||||
}
|
||||
|
||||
/// Iterate over all ISEQ payloads
|
||||
pub fn for_each_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) {
|
||||
for_each_iseq(|iseq| {
|
||||
if let Some(iseq_payload) = get_iseq_payload(iseq) {
|
||||
callback(iseq_payload);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Iterate over all on-stack ISEQs
|
||||
pub fn for_each_on_stack_iseq<F: FnMut(IseqPtr)>(mut callback: F) {
|
||||
unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) {
|
||||
@ -1425,13 +1986,17 @@ pub fn take_version_list(blockid: BlockId) -> VersionList {
|
||||
fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
|
||||
let insn_idx = blockid.idx.as_usize();
|
||||
match get_iseq_payload(blockid.iseq) {
|
||||
|
||||
// FIXME: this counting logic is going to be expensive.
|
||||
// We should avoid it if possible
|
||||
|
||||
Some(payload) => {
|
||||
payload
|
||||
.version_map
|
||||
.get(insn_idx)
|
||||
.map(|versions| {
|
||||
versions.iter().filter(|&&version|
|
||||
unsafe { version.as_ref() }.ctx.inline() == inlined
|
||||
Context::decode(unsafe { version.as_ref() }.ctx).inline() == inlined
|
||||
).count()
|
||||
})
|
||||
.unwrap_or(0)
|
||||
@ -1476,10 +2041,11 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
|
||||
// For each version matching the blockid
|
||||
for blockref in versions.iter() {
|
||||
let block = unsafe { blockref.as_ref() };
|
||||
let block_ctx = Context::decode(block.ctx);
|
||||
|
||||
// Note that we always prefer the first matching
|
||||
// version found because of inline-cache chains
|
||||
match ctx.diff(&block.ctx) {
|
||||
match ctx.diff(&block_ctx) {
|
||||
TypeDiff::Compatible(diff) if diff < best_diff => {
|
||||
best_version = Some(*blockref);
|
||||
best_diff = diff;
|
||||
@ -1561,7 +2127,7 @@ unsafe fn add_block_version(blockref: BlockRef, cb: &CodeBlock) {
|
||||
let block = unsafe { blockref.as_ref() };
|
||||
|
||||
// Function entry blocks must have stack size 0
|
||||
assert!(!(block.iseq_range.start == 0 && block.ctx.stack_size > 0));
|
||||
debug_assert!(!(block.iseq_range.start == 0 && Context::decode(block.ctx).stack_size > 0));
|
||||
|
||||
let version_list = get_or_create_version_list(block.get_blockid());
|
||||
|
||||
@ -1620,12 +2186,14 @@ impl JITState {
|
||||
|
||||
incr_counter_by!(num_gc_obj_refs, gc_obj_offsets.len());
|
||||
|
||||
let ctx = Context::encode(&self.get_starting_ctx());
|
||||
|
||||
// Make the new block
|
||||
let block = MaybeUninit::new(Block {
|
||||
start_addr,
|
||||
iseq: Cell::new(self.get_iseq()),
|
||||
iseq_range: self.get_starting_insn_idx()..end_insn_idx,
|
||||
ctx: self.get_starting_ctx(),
|
||||
ctx,
|
||||
end_addr: Cell::new(end_addr),
|
||||
incoming: MutableBranchList(Cell::default()),
|
||||
gc_obj_offsets: gc_obj_offsets.into_boxed_slice(),
|
||||
@ -2382,6 +2950,7 @@ fn gen_block_series_body(
|
||||
};
|
||||
|
||||
// Generate new block using context from the last branch.
|
||||
let requested_ctx = Context::decode(requested_ctx);
|
||||
let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb);
|
||||
|
||||
// If the block failed to compile
|
||||
@ -2769,7 +3338,8 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
|
||||
return target.get_address().unwrap().raw_ptr(cb);
|
||||
}
|
||||
|
||||
(target.get_blockid(), target.get_ctx())
|
||||
let target_ctx = Context::decode(target.get_ctx());
|
||||
(target.get_blockid(), target_ctx)
|
||||
};
|
||||
|
||||
let (cfp, original_interp_sp) = unsafe {
|
||||
@ -2906,7 +3476,7 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
|
||||
/// Generate a "stub", a piece of code that calls the compiler back when run.
|
||||
/// A piece of code that redeems for more code; a thunk for code.
|
||||
fn gen_branch_stub(
|
||||
ctx: &Context,
|
||||
ctx: u32,
|
||||
ocb: &mut OutlinedCb,
|
||||
branch_struct_address: usize,
|
||||
target_idx: u32,
|
||||
@ -2914,8 +3484,8 @@ fn gen_branch_stub(
|
||||
let ocb = ocb.unwrap();
|
||||
|
||||
let mut asm = Assembler::new();
|
||||
asm.ctx = *ctx;
|
||||
asm.set_reg_temps(ctx.reg_temps);
|
||||
asm.ctx = Context::decode(ctx);
|
||||
asm.set_reg_temps(asm.ctx.reg_temps);
|
||||
asm_comment!(asm, "branch stub hit");
|
||||
|
||||
if asm.ctx.is_return_landing() {
|
||||
@ -3112,7 +3682,7 @@ pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm:
|
||||
// compile the target block right after this one (fallthrough).
|
||||
BranchTarget::Stub(Box::new(BranchStub {
|
||||
address: None,
|
||||
ctx: *ctx,
|
||||
ctx: Context::encode(ctx),
|
||||
iseq: Cell::new(target0.iseq),
|
||||
iseq_idx: target0.idx,
|
||||
}))
|
||||
@ -3364,7 +3934,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
|
||||
}
|
||||
|
||||
// Create a stub for this branch target
|
||||
let stub_addr = gen_branch_stub(&block.ctx, ocb, branchref.as_ptr() as usize, target_idx as u32);
|
||||
let stub_addr = gen_branch_stub(block.ctx, ocb, branchref.as_ptr() as usize, target_idx as u32);
|
||||
|
||||
// In case we were unable to generate a stub (e.g. OOM). Use the block's
|
||||
// exit instead of a stub for the block. It's important that we
|
||||
@ -3546,11 +4116,6 @@ mod tests {
|
||||
assert_eq!(t.get_local_idx(), 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn context_size() {
|
||||
assert_eq!(mem::size_of::<Context>(), 23);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn types() {
|
||||
// Valid src => dst
|
||||
@ -3695,7 +4260,7 @@ mod tests {
|
||||
iseq: Cell::new(ptr::null()),
|
||||
iseq_idx: 0,
|
||||
address: None,
|
||||
ctx: Context::default(),
|
||||
ctx: 0,
|
||||
})))))]
|
||||
};
|
||||
// For easier soundness reasoning, make sure the reference returned does not out live the
|
||||
@ -3728,7 +4293,7 @@ mod tests {
|
||||
iseq: Cell::new(ptr::null()),
|
||||
iseq_idx: 0,
|
||||
address: None,
|
||||
ctx: Context::default(),
|
||||
ctx: 0,
|
||||
})))));
|
||||
// Invalid ISeq; we never dereference it.
|
||||
let secret_iseq = NonNull::<rb_iseq_t>::dangling().as_ptr();
|
||||
|
@ -10,8 +10,6 @@ use std::time::Instant;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::codegen::CodegenGlobals;
|
||||
use crate::core::Context;
|
||||
use crate::core::for_each_iseq_payload;
|
||||
use crate::cruby::*;
|
||||
use crate::options::*;
|
||||
use crate::yjit::yjit_enabled_p;
|
||||
@ -557,6 +555,7 @@ make_counters! {
|
||||
branch_insn_count,
|
||||
branch_known_count,
|
||||
max_inline_versions,
|
||||
num_contexts_encoded,
|
||||
|
||||
freed_iseq_count,
|
||||
|
||||
@ -641,8 +640,8 @@ pub extern "C" fn rb_yjit_print_stats_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE
|
||||
/// Primitive called in yjit.rb.
|
||||
/// Export all YJIT statistics as a Ruby hash.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE, context: VALUE) -> VALUE {
|
||||
with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict(context == Qtrue))
|
||||
pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
|
||||
with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict())
|
||||
}
|
||||
|
||||
/// Primitive called in yjit.rb
|
||||
@ -701,7 +700,7 @@ pub extern "C" fn rb_yjit_incr_counter(counter_name: *const std::os::raw::c_char
|
||||
}
|
||||
|
||||
/// Export all YJIT statistics as a Ruby hash.
|
||||
fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
|
||||
fn rb_yjit_gen_stats_dict() -> VALUE {
|
||||
// If YJIT is not enabled, return Qnil
|
||||
if !yjit_enabled_p() {
|
||||
return Qnil;
|
||||
@ -744,14 +743,9 @@ fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
|
||||
// Rust global allocations in bytes
|
||||
hash_aset_usize!(hash, "yjit_alloc_size", GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst));
|
||||
|
||||
// `context` is true at RubyVM::YJIT._print_stats for --yjit-stats. It's false by default
|
||||
// for RubyVM::YJIT.runtime_stats because counting all Contexts could be expensive.
|
||||
if context {
|
||||
let live_context_count = get_live_context_count();
|
||||
let context_size = std::mem::size_of::<Context>();
|
||||
hash_aset_usize!(hash, "live_context_count", live_context_count);
|
||||
hash_aset_usize!(hash, "live_context_size", live_context_count * context_size);
|
||||
}
|
||||
// How many bytes we are using to store context data
|
||||
let context_data = CodegenGlobals::get_context_data();
|
||||
hash_aset_usize!(hash, "context_data_bytes", context_data.num_bytes());
|
||||
|
||||
// VM instructions count
|
||||
hash_aset_usize!(hash, "vm_insns_count", rb_vm_insns_count as usize);
|
||||
@ -846,21 +840,6 @@ fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
|
||||
hash
|
||||
}
|
||||
|
||||
fn get_live_context_count() -> usize {
|
||||
let mut count = 0;
|
||||
for_each_iseq_payload(|iseq_payload| {
|
||||
for blocks in iseq_payload.version_map.iter() {
|
||||
for block in blocks.iter() {
|
||||
count += unsafe { block.as_ref() }.get_ctx_count();
|
||||
}
|
||||
}
|
||||
for block in iseq_payload.dead_blocks.iter() {
|
||||
count += unsafe { block.as_ref() }.get_ctx_count();
|
||||
}
|
||||
});
|
||||
count
|
||||
}
|
||||
|
||||
/// Record the backtrace when a YJIT exit occurs. This functionality requires
|
||||
/// that the stats feature is enabled as well as the --yjit-trace-exits option.
|
||||
///
|
||||
|
Loading…
x
Reference in New Issue
Block a user