YJIT: Allow inlining ISEQ calls with a block (#9622)
* YJIT: Allow inlining ISEQ calls with a block * Leave a TODO comment about u16 inline_block
This commit is contained in:
parent
557b69e83b
commit
27c1dd8634
7
benchmark/loop_times_megamorphic.yml
Normal file
7
benchmark/loop_times_megamorphic.yml
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
prelude: |
|
||||||
|
eval(<<~EOS)
|
||||||
|
def loop_times_megamorphic
|
||||||
|
#{"1.times {|i|};" * 1000}
|
||||||
|
end
|
||||||
|
EOS
|
||||||
|
benchmark: loop_times_megamorphic
|
@ -8637,6 +8637,9 @@ compile_builtin_attr(rb_iseq_t *iseq, const NODE *node)
|
|||||||
if (strcmp(RSTRING_PTR(string), "leaf") == 0) {
|
if (strcmp(RSTRING_PTR(string), "leaf") == 0) {
|
||||||
ISEQ_BODY(iseq)->builtin_attrs |= BUILTIN_ATTR_LEAF;
|
ISEQ_BODY(iseq)->builtin_attrs |= BUILTIN_ATTR_LEAF;
|
||||||
}
|
}
|
||||||
|
else if (strcmp(RSTRING_PTR(string), "inline_block") == 0) {
|
||||||
|
ISEQ_BODY(iseq)->builtin_attrs |= BUILTIN_ATTR_INLINE_BLOCK;
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
goto unknown_arg;
|
goto unknown_arg;
|
||||||
}
|
}
|
||||||
|
@ -87,6 +87,7 @@ module Kernel
|
|||||||
#++
|
#++
|
||||||
#
|
#
|
||||||
def tap
|
def tap
|
||||||
|
Primitive.attr! :inline_block
|
||||||
yield(self)
|
yield(self)
|
||||||
self
|
self
|
||||||
end
|
end
|
||||||
@ -127,6 +128,7 @@ module Kernel
|
|||||||
# then {|response| JSON.parse(response) }
|
# then {|response| JSON.parse(response) }
|
||||||
#
|
#
|
||||||
def then
|
def then
|
||||||
|
Primitive.attr! :inline_block
|
||||||
unless block_given?
|
unless block_given?
|
||||||
return Primitive.cexpr! 'SIZED_ENUMERATOR(self, 0, 0, rb_obj_size)'
|
return Primitive.cexpr! 'SIZED_ENUMERATOR(self, 0, 0, rb_obj_size)'
|
||||||
end
|
end
|
||||||
@ -142,6 +144,7 @@ module Kernel
|
|||||||
# "my string".yield_self {|s| s.upcase } #=> "MY STRING"
|
# "my string".yield_self {|s| s.upcase } #=> "MY STRING"
|
||||||
#
|
#
|
||||||
def yield_self
|
def yield_self
|
||||||
|
Primitive.attr! :inline_block
|
||||||
unless block_given?
|
unless block_given?
|
||||||
return Primitive.cexpr! 'SIZED_ENUMERATOR(self, 0, 0, rb_obj_size)'
|
return Primitive.cexpr! 'SIZED_ENUMERATOR(self, 0, 0, rb_obj_size)'
|
||||||
end
|
end
|
||||||
@ -178,6 +181,7 @@ module Kernel
|
|||||||
# puts enum.next
|
# puts enum.next
|
||||||
# } #=> :ok
|
# } #=> :ok
|
||||||
def loop
|
def loop
|
||||||
|
Primitive.attr! :inline_block
|
||||||
unless block_given?
|
unless block_given?
|
||||||
return enum_for(:loop) { Float::INFINITY }
|
return enum_for(:loop) { Float::INFINITY }
|
||||||
end
|
end
|
||||||
|
@ -229,6 +229,7 @@ class Integer
|
|||||||
#
|
#
|
||||||
# With no block given, returns an Enumerator.
|
# With no block given, returns an Enumerator.
|
||||||
def times
|
def times
|
||||||
|
Primitive.attr! :inline_block
|
||||||
unless block_given?
|
unless block_given?
|
||||||
return to_enum(:times) { self < 0 ? 0 : self }
|
return to_enum(:times) { self < 0 ? 0 : self }
|
||||||
end
|
end
|
||||||
|
@ -6,7 +6,7 @@ require_relative 'ruby_vm/helpers/c_escape'
|
|||||||
|
|
||||||
SUBLIBS = {}
|
SUBLIBS = {}
|
||||||
REQUIRED = {}
|
REQUIRED = {}
|
||||||
BUILTIN_ATTRS = %w[leaf]
|
BUILTIN_ATTRS = %w[leaf inline_block]
|
||||||
|
|
||||||
def string_literal(lit, str = [])
|
def string_literal(lit, str = [])
|
||||||
while lit
|
while lit
|
||||||
|
@ -368,6 +368,8 @@ enum rb_builtin_attr {
|
|||||||
BUILTIN_ATTR_LEAF = 0x01,
|
BUILTIN_ATTR_LEAF = 0x01,
|
||||||
// This iseq only contains single `opt_invokebuiltin_delegate_leave` instruction with 0 arguments.
|
// This iseq only contains single `opt_invokebuiltin_delegate_leave` instruction with 0 arguments.
|
||||||
BUILTIN_ATTR_SINGLE_NOARG_LEAF = 0x02,
|
BUILTIN_ATTR_SINGLE_NOARG_LEAF = 0x02,
|
||||||
|
// This attribute signals JIT to duplicate the iseq for each block iseq so that its `yield` will be monomorphic.
|
||||||
|
BUILTIN_ATTR_INLINE_BLOCK = 0x04,
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef VALUE (*rb_jit_func_t)(struct rb_execution_context_struct *, struct rb_control_frame_struct *);
|
typedef VALUE (*rb_jit_func_t)(struct rb_execution_context_struct *, struct rb_control_frame_struct *);
|
||||||
|
1
yjit.rb
1
yjit.rb
@ -345,6 +345,7 @@ module RubyVM::YJIT
|
|||||||
if stats[:compiled_blockid_count] != 0
|
if stats[:compiled_blockid_count] != 0
|
||||||
out.puts "versions_per_block: " + format_number(13, "%4.3f" % (stats[:compiled_block_count].fdiv(stats[:compiled_blockid_count])))
|
out.puts "versions_per_block: " + format_number(13, "%4.3f" % (stats[:compiled_block_count].fdiv(stats[:compiled_blockid_count])))
|
||||||
end
|
end
|
||||||
|
out.puts "max_inline_versions: " + format_number(13, stats[:max_inline_versions])
|
||||||
out.puts "compiled_branch_count: " + format_number(13, stats[:compiled_branch_count])
|
out.puts "compiled_branch_count: " + format_number(13, stats[:compiled_branch_count])
|
||||||
out.puts "compile_time_ms: " + format_number(13, stats[:compile_time_ns] / (1000 * 1000))
|
out.puts "compile_time_ms: " + format_number(13, stats[:compile_time_ns] / (1000 * 1000))
|
||||||
out.puts "block_next_count: " + format_number(13, stats[:block_next_count])
|
out.puts "block_next_count: " + format_number(13, stats[:block_next_count])
|
||||||
|
@ -1810,7 +1810,7 @@ fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd {
|
|||||||
|
|
||||||
// Gets the EP of the ISeq of the containing method, or "local level".
|
// Gets the EP of the ISeq of the containing method, or "local level".
|
||||||
// Equivalent of GET_LEP() macro.
|
// Equivalent of GET_LEP() macro.
|
||||||
fn gen_get_lep(jit: &mut JITState, asm: &mut Assembler) -> Opnd {
|
fn gen_get_lep(jit: &JITState, asm: &mut Assembler) -> Opnd {
|
||||||
// Equivalent of get_lvar_level() in compile.c
|
// Equivalent of get_lvar_level() in compile.c
|
||||||
fn get_lvar_level(iseq: IseqPtr) -> u32 {
|
fn get_lvar_level(iseq: IseqPtr) -> u32 {
|
||||||
if iseq == unsafe { rb_get_iseq_body_local_iseq(iseq) } {
|
if iseq == unsafe { rb_get_iseq_body_local_iseq(iseq) } {
|
||||||
@ -6910,6 +6910,12 @@ fn gen_send_iseq(
|
|||||||
// Create a context for the callee
|
// Create a context for the callee
|
||||||
let mut callee_ctx = Context::default();
|
let mut callee_ctx = Context::default();
|
||||||
|
|
||||||
|
// If the callee has :inline_block annotation and the callsite has a block ISEQ,
|
||||||
|
// duplicate a callee block for each block ISEQ to make its `yield` monomorphic.
|
||||||
|
if let (Some(BlockHandler::BlockISeq(iseq)), true) = (block, builtin_attrs & BUILTIN_ATTR_INLINE_BLOCK != 0) {
|
||||||
|
callee_ctx.set_inline_block(iseq);
|
||||||
|
}
|
||||||
|
|
||||||
// Set the argument types in the callee's context
|
// Set the argument types in the callee's context
|
||||||
for arg_idx in 0..argc {
|
for arg_idx in 0..argc {
|
||||||
let stack_offs: u8 = (argc - arg_idx - 1).try_into().unwrap();
|
let stack_offs: u8 = (argc - arg_idx - 1).try_into().unwrap();
|
||||||
@ -7904,6 +7910,13 @@ fn gen_invokeblock_specialized(
|
|||||||
Counter::guard_invokeblock_tag_changed,
|
Counter::guard_invokeblock_tag_changed,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// If the current ISEQ is annotated to be inlined but it's not being inlined here,
|
||||||
|
// generate a dynamic dispatch to avoid making this yield megamorphic.
|
||||||
|
if unsafe { rb_yjit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() {
|
||||||
|
gen_counter_incr(asm, Counter::invokeblock_iseq_not_inlined);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
let comptime_captured = unsafe { ((comptime_handler.0 & !0x3) as *const rb_captured_block).as_ref().unwrap() };
|
let comptime_captured = unsafe { ((comptime_handler.0 & !0x3) as *const rb_captured_block).as_ref().unwrap() };
|
||||||
let comptime_iseq = unsafe { *comptime_captured.code.iseq.as_ref() };
|
let comptime_iseq = unsafe { *comptime_captured.code.iseq.as_ref() };
|
||||||
|
|
||||||
|
@ -480,6 +480,13 @@ pub struct Context {
|
|||||||
// Stack slot type/local_idx we track
|
// Stack slot type/local_idx we track
|
||||||
// 8 temp types * 4 bits, total 32 bits
|
// 8 temp types * 4 bits, total 32 bits
|
||||||
temp_payload: u32,
|
temp_payload: u32,
|
||||||
|
|
||||||
|
/// A pointer to a block ISEQ supplied by the caller. 0 if not inlined.
|
||||||
|
/// Not using IseqPtr to satisfy Default trait, and not using Option for #[repr(packed)]
|
||||||
|
/// TODO: This could be u16 if we have a global or per-ISEQ HashMap to convert IseqPtr
|
||||||
|
/// to serial indexes. We're thinking of overhauling Context structure in Ruby 3.4 which
|
||||||
|
/// could allow this to consume no bytes, so we're leaving this as is.
|
||||||
|
inline_block: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Tuple of (iseq, idx) used to identify basic blocks
|
/// Tuple of (iseq, idx) used to identify basic blocks
|
||||||
@ -1400,14 +1407,19 @@ pub fn take_version_list(blockid: BlockId) -> VersionList {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Count the number of block versions matching a given blockid
|
/// Count the number of block versions matching a given blockid
|
||||||
fn get_num_versions(blockid: BlockId) -> usize {
|
/// `inlined: true` counts inlined versions, and `inlined: false` counts other versions.
|
||||||
|
fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
|
||||||
let insn_idx = blockid.idx.as_usize();
|
let insn_idx = blockid.idx.as_usize();
|
||||||
match get_iseq_payload(blockid.iseq) {
|
match get_iseq_payload(blockid.iseq) {
|
||||||
Some(payload) => {
|
Some(payload) => {
|
||||||
payload
|
payload
|
||||||
.version_map
|
.version_map
|
||||||
.get(insn_idx)
|
.get(insn_idx)
|
||||||
.map(|versions| versions.len())
|
.map(|versions| {
|
||||||
|
versions.iter().filter(|&&version|
|
||||||
|
unsafe { version.as_ref() }.ctx.inline() == inlined
|
||||||
|
).count()
|
||||||
|
})
|
||||||
.unwrap_or(0)
|
.unwrap_or(0)
|
||||||
}
|
}
|
||||||
None => 0,
|
None => 0,
|
||||||
@ -1465,6 +1477,9 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
|
|||||||
return best_version;
|
return best_version;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Allow inlining a Block up to MAX_INLINE_VERSIONS times.
|
||||||
|
const MAX_INLINE_VERSIONS: usize = 1000;
|
||||||
|
|
||||||
/// Produce a generic context when the block version limit is hit for a blockid
|
/// Produce a generic context when the block version limit is hit for a blockid
|
||||||
pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
|
pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
|
||||||
// Guard chains implement limits separately, do nothing
|
// Guard chains implement limits separately, do nothing
|
||||||
@ -1472,21 +1487,39 @@ pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
|
|||||||
return *ctx;
|
return *ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let next_versions = get_num_versions(blockid, ctx.inline()) + 1;
|
||||||
|
let max_versions = if ctx.inline() {
|
||||||
|
MAX_INLINE_VERSIONS
|
||||||
|
} else {
|
||||||
|
get_option!(max_versions)
|
||||||
|
};
|
||||||
|
|
||||||
// If this block version we're about to add will hit the version limit
|
// If this block version we're about to add will hit the version limit
|
||||||
if get_num_versions(blockid) + 1 >= get_option!(max_versions) {
|
if next_versions >= max_versions {
|
||||||
// Produce a generic context that stores no type information,
|
// Produce a generic context that stores no type information,
|
||||||
// but still respects the stack_size and sp_offset constraints.
|
// but still respects the stack_size and sp_offset constraints.
|
||||||
// This new context will then match all future requests.
|
// This new context will then match all future requests.
|
||||||
let generic_ctx = ctx.get_generic_ctx();
|
let generic_ctx = ctx.get_generic_ctx();
|
||||||
|
|
||||||
debug_assert_ne!(
|
if cfg!(debug_assertions) {
|
||||||
|
let mut ctx = ctx.clone();
|
||||||
|
if ctx.inline() {
|
||||||
|
// Suppress TypeDiff::Incompatible from ctx.diff(). We return TypeDiff::Incompatible
|
||||||
|
// to keep inlining blocks until we hit the limit, but it's safe to give up inlining.
|
||||||
|
ctx.inline_block = 0;
|
||||||
|
assert!(generic_ctx.inline_block == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_ne!(
|
||||||
TypeDiff::Incompatible,
|
TypeDiff::Incompatible,
|
||||||
ctx.diff(&generic_ctx),
|
ctx.diff(&generic_ctx),
|
||||||
"should substitute a compatible context",
|
"should substitute a compatible context",
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
return generic_ctx;
|
return generic_ctx;
|
||||||
}
|
}
|
||||||
|
incr_counter_to!(max_inline_versions, next_versions);
|
||||||
|
|
||||||
return *ctx;
|
return *ctx;
|
||||||
}
|
}
|
||||||
@ -2020,6 +2053,16 @@ impl Context {
|
|||||||
self.local_types = 0;
|
self.local_types = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return true if the code is inlined by the caller
|
||||||
|
pub fn inline(&self) -> bool {
|
||||||
|
self.inline_block != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set a block ISEQ given to the Block of this Context
|
||||||
|
pub fn set_inline_block(&mut self, iseq: IseqPtr) {
|
||||||
|
self.inline_block = iseq as u64
|
||||||
|
}
|
||||||
|
|
||||||
/// Compute a difference score for two context objects
|
/// Compute a difference score for two context objects
|
||||||
pub fn diff(&self, dst: &Context) -> TypeDiff {
|
pub fn diff(&self, dst: &Context) -> TypeDiff {
|
||||||
// Self is the source context (at the end of the predecessor)
|
// Self is the source context (at the end of the predecessor)
|
||||||
@ -2065,6 +2108,13 @@ impl Context {
|
|||||||
TypeDiff::Incompatible => return TypeDiff::Incompatible,
|
TypeDiff::Incompatible => return TypeDiff::Incompatible,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Check the block to inline
|
||||||
|
if src.inline_block != dst.inline_block {
|
||||||
|
// find_block_version should not find existing blocks with different
|
||||||
|
// inline_block so that their yield will not be megamorphic.
|
||||||
|
return TypeDiff::Incompatible;
|
||||||
|
}
|
||||||
|
|
||||||
// For each local type we track
|
// For each local type we track
|
||||||
for i in 0.. MAX_LOCAL_TYPES {
|
for i in 0.. MAX_LOCAL_TYPES {
|
||||||
let t_src = src.get_local_type(i);
|
let t_src = src.get_local_type(i);
|
||||||
@ -3456,7 +3506,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn context_size() {
|
fn context_size() {
|
||||||
assert_eq!(mem::size_of::<Context>(), 15);
|
assert_eq!(mem::size_of::<Context>(), 23);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -449,6 +449,7 @@ pub struct iseq_inline_cvar_cache_entry {
|
|||||||
}
|
}
|
||||||
pub const BUILTIN_ATTR_LEAF: rb_builtin_attr = 1;
|
pub const BUILTIN_ATTR_LEAF: rb_builtin_attr = 1;
|
||||||
pub const BUILTIN_ATTR_SINGLE_NOARG_LEAF: rb_builtin_attr = 2;
|
pub const BUILTIN_ATTR_SINGLE_NOARG_LEAF: rb_builtin_attr = 2;
|
||||||
|
pub const BUILTIN_ATTR_INLINE_BLOCK: rb_builtin_attr = 4;
|
||||||
pub type rb_builtin_attr = u32;
|
pub type rb_builtin_attr = u32;
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
#[derive(Debug, Copy, Clone)]
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
@ -245,7 +245,7 @@ macro_rules! make_counters {
|
|||||||
|
|
||||||
/// The list of counters that are available without --yjit-stats.
|
/// The list of counters that are available without --yjit-stats.
|
||||||
/// They are incremented only by `incr_counter!` and don't use `gen_counter_incr`.
|
/// They are incremented only by `incr_counter!` and don't use `gen_counter_incr`.
|
||||||
pub const DEFAULT_COUNTERS: [Counter; 8] = [
|
pub const DEFAULT_COUNTERS: [Counter; 9] = [
|
||||||
Counter::code_gc_count,
|
Counter::code_gc_count,
|
||||||
Counter::compiled_iseq_entry,
|
Counter::compiled_iseq_entry,
|
||||||
Counter::cold_iseq_entry,
|
Counter::cold_iseq_entry,
|
||||||
@ -254,6 +254,7 @@ pub const DEFAULT_COUNTERS: [Counter; 8] = [
|
|||||||
Counter::compiled_block_count,
|
Counter::compiled_block_count,
|
||||||
Counter::compiled_branch_count,
|
Counter::compiled_branch_count,
|
||||||
Counter::compile_time_ns,
|
Counter::compile_time_ns,
|
||||||
|
Counter::max_inline_versions,
|
||||||
];
|
];
|
||||||
|
|
||||||
/// Macro to increase a counter by name and count
|
/// Macro to increase a counter by name and count
|
||||||
@ -269,6 +270,24 @@ macro_rules! incr_counter_by {
|
|||||||
}
|
}
|
||||||
pub(crate) use incr_counter_by;
|
pub(crate) use incr_counter_by;
|
||||||
|
|
||||||
|
/// Macro to increase a counter if the given value is larger
|
||||||
|
macro_rules! incr_counter_to {
|
||||||
|
// Unsafe is ok here because options are initialized
|
||||||
|
// once before any Ruby code executes
|
||||||
|
($counter_name:ident, $count:expr) => {
|
||||||
|
#[allow(unused_unsafe)]
|
||||||
|
{
|
||||||
|
unsafe {
|
||||||
|
$crate::stats::COUNTERS.$counter_name = u64::max(
|
||||||
|
$crate::stats::COUNTERS.$counter_name,
|
||||||
|
$count as u64,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
pub(crate) use incr_counter_to;
|
||||||
|
|
||||||
/// Macro to increment a counter by name
|
/// Macro to increment a counter by name
|
||||||
macro_rules! incr_counter {
|
macro_rules! incr_counter {
|
||||||
// Unsafe is ok here because options are initialized
|
// Unsafe is ok here because options are initialized
|
||||||
@ -395,6 +414,7 @@ make_counters! {
|
|||||||
invokeblock_iseq_arg0_args_splat,
|
invokeblock_iseq_arg0_args_splat,
|
||||||
invokeblock_iseq_arg0_not_array,
|
invokeblock_iseq_arg0_not_array,
|
||||||
invokeblock_iseq_arg0_wrong_len,
|
invokeblock_iseq_arg0_wrong_len,
|
||||||
|
invokeblock_iseq_not_inlined,
|
||||||
invokeblock_ifunc_args_splat,
|
invokeblock_ifunc_args_splat,
|
||||||
invokeblock_ifunc_kw_splat,
|
invokeblock_ifunc_kw_splat,
|
||||||
invokeblock_proc,
|
invokeblock_proc,
|
||||||
@ -518,6 +538,7 @@ make_counters! {
|
|||||||
defer_empty_count,
|
defer_empty_count,
|
||||||
branch_insn_count,
|
branch_insn_count,
|
||||||
branch_known_count,
|
branch_known_count,
|
||||||
|
max_inline_versions,
|
||||||
|
|
||||||
freed_iseq_count,
|
freed_iseq_count,
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user