YJIT: lazy polymorphic getinstancevariable

Lazily compile out a chain of checks for different known classes and
whether `self` embeds its ivars or not.

* Remove trailing whitespaces

* Get proper addresss in Capstone disassembly

* Lowercase address in Capstone disassembly

Capstone uses lowercase for jump targets in generated listings. Let's
match it.

* Use the same successor in getivar guard chains

Cuts down on duplication

* Address reviews

* Fix copypasta error

* Add a comment
This commit is contained in:
Alan Wu 2021-03-12 12:22:19 -05:00
parent 439db7b81b
commit 5d834bcf9f
9 changed files with 1112 additions and 907 deletions

View File

@ -301,3 +301,63 @@ assert_equal "good", %q{
foo
}
# Test polymorphic getinstancevariable. T_OBJECT -> T_STRING
assert_equal 'ok', %q{
@hello = @h1 = @h2 = @h3 = @h4 = 'ok'
str = ""
str.instance_variable_set(:@hello, 'ok')
public def get
@hello
end
get
get
str.get
str.get
}
# Test polymorphic getinstancevariable, two different classes
assert_equal 'ok', %q{
class Embedded
def initialize
@ivar = 0
end
def get
@ivar
end
end
class Extended < Embedded
def initialize
@v1 = @v2 = @v3 = @v4 = @ivar = 'ok'
end
end
embed = Embedded.new
extend = Extended.new
embed.get
embed.get
extend.get
extend.get
}
# Test megamorphic getinstancevariable
assert_equal 'ok', %q{
parent = Class.new do
def initialize
@hello = @h1 = @h2 = @h3 = @h4 = 'ok'
end
def get
@hello
end
end
subclasses = 300.times.map { Class.new(parent) }
subclasses.each { _1.new.get }
parent.new.get
}

1631
common.mk

File diff suppressed because it is too large Load Diff

View File

@ -21,6 +21,5 @@ RubyVM::Instructions = RubyVM::BareInstructions.to_a + \
RubyVM::MicroJIT::ExampleInstructions.to_a
require_relative 'trace_instructions'
RubyVM::Instructions.freeze

View File

@ -1099,6 +1099,12 @@ iv_index_tbl_lookup(struct st_table *iv_index_tbl, ID id, struct rb_iv_index_tbl
return found ? true : false;
}
bool
rb_iv_index_tbl_lookup(struct st_table *iv_index_tbl, ID id, struct rb_iv_index_tbl_entry **ent)
{
return iv_index_tbl_lookup(iv_index_tbl, id, ent);
}
ALWAYS_INLINE(static void fill_ivar_cache(const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, int is_attr, struct rb_iv_index_tbl_entry *ent));
static inline void

View File

@ -14,13 +14,13 @@ module YJIT
# Sort the blocks by increasing addresses
blocks.sort_by(&:address).each_with_index do |block, i|
str << "== BLOCK #{i+1}/#{blocks.length}: #{block.code.length} BYTES, ISEQ RANGE [#{block.iseq_start_index},#{block.iseq_end_index}[ ".ljust(80, "=")
str << "== BLOCK #{i+1}/#{blocks.length}: #{block.code.length} BYTES, ISEQ RANGE [#{block.iseq_start_index},#{block.iseq_end_index}] ".ljust(80, "=")
str << "\n"
cs.disasm(block.code, 0).each do |i|
cs.disasm(block.code, block.address).each do |i|
str << sprintf(
" %<address>08X: %<instruction>s\t%<details>s\n",
address: block.address + i.address,
" %<address>08x: %<instruction>s\t%<details>s\n",
address: i.address,
instruction: i.mnemonic,
details: i.op_str
)
@ -62,6 +62,7 @@ module YJIT
print_counters(counters, prefix: 'oswb_', prompt: 'opt_send_without_block exit reasons: ')
print_counters(counters, prefix: 'leave_', prompt: 'leave exit reasons: ')
print_counters(counters, prefix: 'getivar_', prompt: 'getinstancevariable exit reasons:')
end
def print_counters(counters, prefix:, prompt:)

View File

@ -7,6 +7,7 @@
#include "builtin.h"
#include "internal/compile.h"
#include "internal/class.h"
#include "internal/object.h"
#include "insns_info.inc"
#include "yjit.h"
#include "yjit_iface.h"
@ -99,6 +100,12 @@ jit_peek_at_stack(jitstate_t* jit, ctx_t* ctx)
return *(sp - 1);
}
static VALUE
jit_peek_at_self(jitstate_t *jit, ctx_t *ctx)
{
return jit->ec->cfp->self;
}
// Save YJIT registers prior to a C call
static void
yjit_save_regs(codeblock_t* cb)
@ -564,102 +571,226 @@ guard_self_is_object(codeblock_t *cb, x86opnd_t self_opnd, uint8_t *side_exit, c
je_ptr(cb, side_exit);
cmp(cb, self_opnd, imm_opnd(Qnil));
je_ptr(cb, side_exit);
// maybe we can do
// RUBY_ASSERT(Qfalse < Qnil);
// cmp(cb, self_opnd, imm_opnd(Qnil));
// jbe(cb, side_exit);
ctx->self_is_object = true;
}
}
static void
gen_jnz_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
{
switch (shape)
{
case SHAPE_NEXT0:
case SHAPE_NEXT1:
RUBY_ASSERT(false);
break;
case SHAPE_DEFAULT:
jnz_ptr(cb, target0);
break;
}
}
static void
gen_jz_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
{
switch (shape)
{
case SHAPE_NEXT0:
case SHAPE_NEXT1:
RUBY_ASSERT(false);
break;
case SHAPE_DEFAULT:
jz_ptr(cb, target0);
break;
}
}
enum jcc_kinds {
JCC_JNE,
JCC_JNZ,
JCC_JZ,
JCC_JE,
};
// Generate a jump to a stub that recompiles the current YARV instruction on failure.
// When depth_limitk is exceeded, generate a jump to a side exit.
static void
jit_chain_guard(enum jcc_kinds jcc, jitstate_t *jit, ctx_t *ctx, uint8_t depth_limit, uint8_t *side_exit)
{
branchgen_fn target0_gen_fn;
switch (jcc) {
case JCC_JNE:
case JCC_JNZ:
target0_gen_fn = gen_jnz_to_target0;
break;
case JCC_JZ:
case JCC_JE:
target0_gen_fn = gen_jz_to_target0;
break;
default:
RUBY_ASSERT(false && "unimplemented jump kind");
break;
};
if (ctx->chain_depth < depth_limit) {
ctx_t deeper = *ctx;
deeper.chain_depth++;
gen_branch(
ctx,
(blockid_t) { jit->iseq, jit->insn_idx },
&deeper,
BLOCKID_NULL,
NULL,
target0_gen_fn
);
}
else {
target0_gen_fn(cb, side_exit, NULL, SHAPE_DEFAULT);
}
}
bool rb_iv_index_tbl_lookup(struct st_table *iv_index_tbl, ID id, struct rb_iv_index_tbl_entry **ent); // vm_insnhelper.c
enum {
GETIVAR_MAX_DEPTH = 10 // up to 5 different classes, and embedded or not for each
};
static codegen_status_t
gen_getinstancevariable(jitstate_t* jit, ctx_t* ctx)
{
IVC ic = (IVC)jit_get_arg(jit, 1);
// Check that the inline cache has been set, slot index is known
if (!ic->entry) {
return YJIT_CANT_COMPILE;
}
// Defer compilation so we can peek at the topmost object
if (!jit_at_current_insn(jit))
{
// Defer compilation so we can specialize a runtime `self`
if (!jit_at_current_insn(jit)) {
defer_compilation(jit->block, jit->insn_idx, ctx);
return YJIT_END_BLOCK;
}
// Peek at the topmost value on the stack at compilation time
VALUE top_val = jit_peek_at_stack(jit, ctx);
// TODO: play with deferred compilation and sidechains! :)
// Specialize base on the compile time self
VALUE self_val = jit_peek_at_self(jit, ctx);
VALUE self_klass = rb_class_of(self_val);
// Create a size-exit to fall back to the interpreter
uint8_t *side_exit = yjit_side_exit(jit, ctx);
// If the class uses the default allocator, instances should all be T_OBJECT
// NOTE: This assumes nobody changes the allocator of the class after allocation.
// Eventually, we can encode whether an object is T_OBJECT or not
// inside object shapes.
if (rb_get_alloc_func(ic->entry->class_value) != rb_class_allocate_instance) {
return YJIT_CANT_COMPILE;
if (rb_get_alloc_func(self_klass) != rb_class_allocate_instance) {
jmp_ptr(cb, side_exit);
return YJIT_END_BLOCK;
}
RUBY_ASSERT(BUILTIN_TYPE(self_val) == T_OBJECT); // because we checked the allocator
ID id = (ID)jit_get_arg(jit, 0);
struct rb_iv_index_tbl_entry *ent;
struct st_table *iv_index_tbl = ROBJECT_IV_INDEX_TBL(self_val);
// Lookup index for the ivar the instruction loads
if (iv_index_tbl && rb_iv_index_tbl_lookup(iv_index_tbl, id, &ent)) {
uint32_t ivar_index = ent->index;
// Load self from CFP
mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
guard_self_is_object(cb, REG0, COUNTED_EXIT(side_exit, getivar_se_self_not_heap), ctx);
// Guard that self has a known class
x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass));
mov(cb, REG1, klass_opnd);
x86opnd_t serial_opnd = mem_opnd(64, REG1, offsetof(struct RClass, class_serial));
cmp(cb, serial_opnd, imm_opnd(RCLASS_SERIAL(self_klass)));
jit_chain_guard(JCC_JNE, jit, ctx, GETIVAR_MAX_DEPTH, side_exit);
// Compile time self is embedded and the ivar index is within the object
if (RB_FL_TEST_RAW(self_val, ROBJECT_EMBED) && ivar_index < ROBJECT_EMBED_LEN_MAX) {
// See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
// Guard that self is embedded
// TODO: BT and JC is shorter
x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
jit_chain_guard(JCC_JZ, jit, ctx, GETIVAR_MAX_DEPTH, side_exit);
// Load the variable
x86opnd_t ivar_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.ary) + ivar_index * SIZEOF_VALUE);
mov(cb, REG1, ivar_opnd);
// Guard that the variable is not Qundef
cmp(cb, REG1, imm_opnd(Qundef));
je_ptr(cb, COUNTED_EXIT(side_exit, getivar_undef));
// Push the ivar on the stack
x86opnd_t out_opnd = ctx_stack_push(ctx, T_NONE);
mov(cb, out_opnd, REG1);
}
else {
// Compile time self is *not* embeded.
// Guard that self is *not* embedded
// See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
jit_chain_guard(JCC_JNZ, jit, ctx, GETIVAR_MAX_DEPTH, side_exit);
// check that the extended table is big enough
if (ivar_index >= ROBJECT_EMBED_LEN_MAX + 1) {
// Check that the slot is inside the extended table (num_slots > index)
x86opnd_t num_slots = mem_opnd(32, REG0, offsetof(struct RObject, as.heap.numiv));
cmp(cb, num_slots, imm_opnd(ivar_index));
jle_ptr(cb, COUNTED_EXIT(side_exit, getivar_idx_out_of_range));
}
// Get a pointer to the extended table
x86opnd_t tbl_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.heap.ivptr));
mov(cb, REG0, tbl_opnd);
// Read the ivar from the extended table
x86opnd_t ivar_opnd = mem_opnd(64, REG0, sizeof(VALUE) * ivar_index);
mov(cb, REG0, ivar_opnd);
// Check that the ivar is not Qundef
cmp(cb, REG0, imm_opnd(Qundef));
je_ptr(cb, COUNTED_EXIT(side_exit, getivar_undef));
// Push the ivar on the stack
x86opnd_t out_opnd = ctx_stack_push(ctx, T_NONE);
mov(cb, out_opnd, REG0);
}
// Jump to next instruction. This allows guard chains to share the same successor.
{
ctx_t reset_depth = *ctx;
reset_depth.chain_depth = 0;
blockid_t jump_block = { jit->iseq, jit_next_insn_idx(jit) };
// Generate the jump instruction
gen_direct_jump(
&reset_depth,
jump_block
);
}
return YJIT_END_BLOCK;
}
uint32_t ivar_index = ic->entry->index;
// Create a size-exit to fall back to the interpreter
uint8_t* side_exit = yjit_side_exit(jit, ctx);
// Load self from CFP
mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
guard_self_is_object(cb, REG0, side_exit, ctx);
// Bail if receiver class is different from compiled time call cache class
x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass));
mov(cb, REG1, klass_opnd);
x86opnd_t serial_opnd = mem_opnd(64, REG1, offsetof(struct RClass, class_serial));
cmp(cb, serial_opnd, imm_opnd(ic->entry->class_serial));
jne_ptr(cb, side_exit);
// Bail if the ivars are not on the extended table
// See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
jnz_ptr(cb, side_exit);
// check that the extended table is big enough
if (ivar_index >= ROBJECT_EMBED_LEN_MAX + 1) {
// Check that the slot is inside the extended table (num_slots > index)
x86opnd_t num_slots = mem_opnd(32, REG0, offsetof(struct RObject, as.heap.numiv));
cmp(cb, num_slots, imm_opnd(ivar_index));
jle_ptr(cb, side_exit);
}
// Get a pointer to the extended table
x86opnd_t tbl_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.heap.ivptr));
mov(cb, REG0, tbl_opnd);
// Read the ivar from the extended table
x86opnd_t ivar_opnd = mem_opnd(64, REG0, sizeof(VALUE) * ivar_index);
mov(cb, REG0, ivar_opnd);
// Check that the ivar is not Qundef
cmp(cb, REG0, imm_opnd(Qundef));
je_ptr(cb, side_exit);
// Push the ivar on the stack
x86opnd_t out_opnd = ctx_stack_push(ctx, T_NONE);
mov(cb, out_opnd, REG0);
return YJIT_KEEP_COMPILING;
// Take side exit because YJIT_CANT_COMPILE can exit to a JIT entry point and
// form an infinite loop when chain_depth > 0.
jmp_ptr(cb, side_exit);
return YJIT_END_BLOCK;
}
static codegen_status_t

View File

@ -10,7 +10,7 @@
#define MAX_VERSIONS 4
// Maximum number of branch instructions we can track
#define MAX_BRANCHES 32768
#define MAX_BRANCHES 100000
// Registered branch entries
branch_t branch_entries[MAX_BRANCHES];
@ -344,7 +344,8 @@ uint8_t* gen_entry_point(const rb_iseq_t *iseq, uint32_t insn_idx, rb_execution_
// Called by the generated code when a branch stub is executed
// Triggers compilation of branches and code patching
uint8_t* branch_stub_hit(uint32_t branch_idx, uint32_t target_idx, rb_execution_context_t* ec)
static uint8_t *
branch_stub_hit(uint32_t branch_idx, uint32_t target_idx, rb_execution_context_t* ec)
{
uint8_t* dst_addr;
@ -380,18 +381,18 @@ uint8_t* branch_stub_hit(uint32_t branch_idx, uint32_t target_idx, rb_execution_
ctx_t generic_ctx = DEFAULT_CTX;
generic_ctx.stack_size = target_ctx->stack_size;
generic_ctx.sp_offset = target_ctx->sp_offset;
if (get_num_versions(target) >= MAX_VERSIONS - 1)
{
//fprintf(stderr, "version limit hit in branch_stub_hit\n");
target_ctx = &generic_ctx;
if (target_ctx->chain_depth == 0) { // guard chains implement limits individually
if (get_num_versions(target) >= MAX_VERSIONS - 1) {
//fprintf(stderr, "version limit hit in branch_stub_hit\n");
target_ctx = &generic_ctx;
}
}
// Try to find a compiled version of this block
block_t* p_block = find_block_version(target, target_ctx);
// If this block hasn't yet been compiled
if (!p_block)
{
if (!p_block) {
p_block = gen_block_version(target, target_ctx, ec);
}

View File

@ -506,7 +506,9 @@ yjit_disasm_init(VALUE klass)
{
csh * handle;
VALUE disasm = TypedData_Make_Struct(klass, csh, &yjit_disasm_type, handle);
cs_open(CS_ARCH_X86, CS_MODE_64, handle);
if (cs_open(CS_ARCH_X86, CS_MODE_64, handle) != CS_ERR_OK) {
rb_raise(rb_eRuntimeError, "failed to make Capstone handle");
}
return disasm;
}
@ -518,7 +520,7 @@ yjit_disasm(VALUE self, VALUE code, VALUE from)
cs_insn *insns;
TypedData_Get_Struct(self, csh, &yjit_disasm_type, handle);
count = cs_disasm(*handle, (uint8_t*)StringValuePtr(code), RSTRING_LEN(code), NUM2INT(from), 0, &insns);
count = cs_disasm(*handle, (uint8_t*)StringValuePtr(code), RSTRING_LEN(code), NUM2ULL(from), 0, &insns);
VALUE insn_list = rb_ary_new_capa(count);
for (size_t i = 0; i < count; i++) {

View File

@ -47,6 +47,10 @@ YJIT_DECLARE_COUNTERS(
leave_se_finish_frame,
leave_se_interrupt,
getivar_se_self_not_heap,
getivar_idx_out_of_range,
getivar_undef,
// Member with known name for iterating over counters
last_member
)