YJIT: lazy polymorphic getinstancevariable
Lazily compile out a chain of checks for different known classes and whether `self` embeds its ivars or not. * Remove trailing whitespaces * Get proper addresss in Capstone disassembly * Lowercase address in Capstone disassembly Capstone uses lowercase for jump targets in generated listings. Let's match it. * Use the same successor in getivar guard chains Cuts down on duplication * Address reviews * Fix copypasta error * Add a comment
This commit is contained in:
parent
439db7b81b
commit
5d834bcf9f
@ -301,3 +301,63 @@ assert_equal "good", %q{
|
||||
|
||||
foo
|
||||
}
|
||||
|
||||
# Test polymorphic getinstancevariable. T_OBJECT -> T_STRING
|
||||
assert_equal 'ok', %q{
|
||||
@hello = @h1 = @h2 = @h3 = @h4 = 'ok'
|
||||
str = ""
|
||||
str.instance_variable_set(:@hello, 'ok')
|
||||
|
||||
public def get
|
||||
@hello
|
||||
end
|
||||
|
||||
get
|
||||
get
|
||||
str.get
|
||||
str.get
|
||||
}
|
||||
|
||||
# Test polymorphic getinstancevariable, two different classes
|
||||
assert_equal 'ok', %q{
|
||||
class Embedded
|
||||
def initialize
|
||||
@ivar = 0
|
||||
end
|
||||
|
||||
def get
|
||||
@ivar
|
||||
end
|
||||
end
|
||||
|
||||
class Extended < Embedded
|
||||
def initialize
|
||||
@v1 = @v2 = @v3 = @v4 = @ivar = 'ok'
|
||||
end
|
||||
end
|
||||
|
||||
embed = Embedded.new
|
||||
extend = Extended.new
|
||||
|
||||
embed.get
|
||||
embed.get
|
||||
extend.get
|
||||
extend.get
|
||||
}
|
||||
|
||||
# Test megamorphic getinstancevariable
|
||||
assert_equal 'ok', %q{
|
||||
parent = Class.new do
|
||||
def initialize
|
||||
@hello = @h1 = @h2 = @h3 = @h4 = 'ok'
|
||||
end
|
||||
|
||||
def get
|
||||
@hello
|
||||
end
|
||||
end
|
||||
|
||||
subclasses = 300.times.map { Class.new(parent) }
|
||||
subclasses.each { _1.new.get }
|
||||
parent.new.get
|
||||
}
|
||||
|
@ -21,6 +21,5 @@ RubyVM::Instructions = RubyVM::BareInstructions.to_a + \
|
||||
RubyVM::MicroJIT::ExampleInstructions.to_a
|
||||
|
||||
|
||||
|
||||
require_relative 'trace_instructions'
|
||||
RubyVM::Instructions.freeze
|
||||
|
@ -1099,6 +1099,12 @@ iv_index_tbl_lookup(struct st_table *iv_index_tbl, ID id, struct rb_iv_index_tbl
|
||||
return found ? true : false;
|
||||
}
|
||||
|
||||
bool
|
||||
rb_iv_index_tbl_lookup(struct st_table *iv_index_tbl, ID id, struct rb_iv_index_tbl_entry **ent)
|
||||
{
|
||||
return iv_index_tbl_lookup(iv_index_tbl, id, ent);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE(static void fill_ivar_cache(const rb_iseq_t *iseq, IVC ic, const struct rb_callcache *cc, int is_attr, struct rb_iv_index_tbl_entry *ent));
|
||||
|
||||
static inline void
|
||||
|
9
yjit.rb
9
yjit.rb
@ -14,13 +14,13 @@ module YJIT
|
||||
|
||||
# Sort the blocks by increasing addresses
|
||||
blocks.sort_by(&:address).each_with_index do |block, i|
|
||||
str << "== BLOCK #{i+1}/#{blocks.length}: #{block.code.length} BYTES, ISEQ RANGE [#{block.iseq_start_index},#{block.iseq_end_index}[ ".ljust(80, "=")
|
||||
str << "== BLOCK #{i+1}/#{blocks.length}: #{block.code.length} BYTES, ISEQ RANGE [#{block.iseq_start_index},#{block.iseq_end_index}] ".ljust(80, "=")
|
||||
str << "\n"
|
||||
|
||||
cs.disasm(block.code, 0).each do |i|
|
||||
cs.disasm(block.code, block.address).each do |i|
|
||||
str << sprintf(
|
||||
" %<address>08X: %<instruction>s\t%<details>s\n",
|
||||
address: block.address + i.address,
|
||||
" %<address>08x: %<instruction>s\t%<details>s\n",
|
||||
address: i.address,
|
||||
instruction: i.mnemonic,
|
||||
details: i.op_str
|
||||
)
|
||||
@ -62,6 +62,7 @@ module YJIT
|
||||
|
||||
print_counters(counters, prefix: 'oswb_', prompt: 'opt_send_without_block exit reasons: ')
|
||||
print_counters(counters, prefix: 'leave_', prompt: 'leave exit reasons: ')
|
||||
print_counters(counters, prefix: 'getivar_', prompt: 'getinstancevariable exit reasons:')
|
||||
end
|
||||
|
||||
def print_counters(counters, prefix:, prompt:)
|
||||
|
285
yjit_codegen.c
285
yjit_codegen.c
@ -7,6 +7,7 @@
|
||||
#include "builtin.h"
|
||||
#include "internal/compile.h"
|
||||
#include "internal/class.h"
|
||||
#include "internal/object.h"
|
||||
#include "insns_info.inc"
|
||||
#include "yjit.h"
|
||||
#include "yjit_iface.h"
|
||||
@ -99,6 +100,12 @@ jit_peek_at_stack(jitstate_t* jit, ctx_t* ctx)
|
||||
return *(sp - 1);
|
||||
}
|
||||
|
||||
static VALUE
|
||||
jit_peek_at_self(jitstate_t *jit, ctx_t *ctx)
|
||||
{
|
||||
return jit->ec->cfp->self;
|
||||
}
|
||||
|
||||
// Save YJIT registers prior to a C call
|
||||
static void
|
||||
yjit_save_regs(codeblock_t* cb)
|
||||
@ -564,102 +571,226 @@ guard_self_is_object(codeblock_t *cb, x86opnd_t self_opnd, uint8_t *side_exit, c
|
||||
je_ptr(cb, side_exit);
|
||||
cmp(cb, self_opnd, imm_opnd(Qnil));
|
||||
je_ptr(cb, side_exit);
|
||||
|
||||
// maybe we can do
|
||||
// RUBY_ASSERT(Qfalse < Qnil);
|
||||
// cmp(cb, self_opnd, imm_opnd(Qnil));
|
||||
// jbe(cb, side_exit);
|
||||
|
||||
|
||||
ctx->self_is_object = true;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
gen_jnz_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
|
||||
{
|
||||
switch (shape)
|
||||
{
|
||||
case SHAPE_NEXT0:
|
||||
case SHAPE_NEXT1:
|
||||
RUBY_ASSERT(false);
|
||||
break;
|
||||
|
||||
case SHAPE_DEFAULT:
|
||||
jnz_ptr(cb, target0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
gen_jz_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
|
||||
{
|
||||
switch (shape)
|
||||
{
|
||||
case SHAPE_NEXT0:
|
||||
case SHAPE_NEXT1:
|
||||
RUBY_ASSERT(false);
|
||||
break;
|
||||
|
||||
case SHAPE_DEFAULT:
|
||||
jz_ptr(cb, target0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
enum jcc_kinds {
|
||||
JCC_JNE,
|
||||
JCC_JNZ,
|
||||
JCC_JZ,
|
||||
JCC_JE,
|
||||
};
|
||||
|
||||
// Generate a jump to a stub that recompiles the current YARV instruction on failure.
|
||||
// When depth_limitk is exceeded, generate a jump to a side exit.
|
||||
static void
|
||||
jit_chain_guard(enum jcc_kinds jcc, jitstate_t *jit, ctx_t *ctx, uint8_t depth_limit, uint8_t *side_exit)
|
||||
{
|
||||
branchgen_fn target0_gen_fn;
|
||||
|
||||
switch (jcc) {
|
||||
case JCC_JNE:
|
||||
case JCC_JNZ:
|
||||
target0_gen_fn = gen_jnz_to_target0;
|
||||
break;
|
||||
case JCC_JZ:
|
||||
case JCC_JE:
|
||||
target0_gen_fn = gen_jz_to_target0;
|
||||
break;
|
||||
default:
|
||||
RUBY_ASSERT(false && "unimplemented jump kind");
|
||||
break;
|
||||
};
|
||||
|
||||
if (ctx->chain_depth < depth_limit) {
|
||||
ctx_t deeper = *ctx;
|
||||
deeper.chain_depth++;
|
||||
|
||||
gen_branch(
|
||||
ctx,
|
||||
(blockid_t) { jit->iseq, jit->insn_idx },
|
||||
&deeper,
|
||||
BLOCKID_NULL,
|
||||
NULL,
|
||||
target0_gen_fn
|
||||
);
|
||||
}
|
||||
else {
|
||||
target0_gen_fn(cb, side_exit, NULL, SHAPE_DEFAULT);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool rb_iv_index_tbl_lookup(struct st_table *iv_index_tbl, ID id, struct rb_iv_index_tbl_entry **ent); // vm_insnhelper.c
|
||||
|
||||
enum {
|
||||
GETIVAR_MAX_DEPTH = 10 // up to 5 different classes, and embedded or not for each
|
||||
};
|
||||
|
||||
static codegen_status_t
|
||||
gen_getinstancevariable(jitstate_t* jit, ctx_t* ctx)
|
||||
{
|
||||
IVC ic = (IVC)jit_get_arg(jit, 1);
|
||||
|
||||
// Check that the inline cache has been set, slot index is known
|
||||
if (!ic->entry) {
|
||||
return YJIT_CANT_COMPILE;
|
||||
}
|
||||
|
||||
// Defer compilation so we can peek at the topmost object
|
||||
if (!jit_at_current_insn(jit))
|
||||
{
|
||||
// Defer compilation so we can specialize a runtime `self`
|
||||
if (!jit_at_current_insn(jit)) {
|
||||
defer_compilation(jit->block, jit->insn_idx, ctx);
|
||||
return YJIT_END_BLOCK;
|
||||
}
|
||||
|
||||
// Peek at the topmost value on the stack at compilation time
|
||||
VALUE top_val = jit_peek_at_stack(jit, ctx);
|
||||
|
||||
// TODO: play with deferred compilation and sidechains! :)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Specialize base on the compile time self
|
||||
VALUE self_val = jit_peek_at_self(jit, ctx);
|
||||
VALUE self_klass = rb_class_of(self_val);
|
||||
|
||||
// Create a size-exit to fall back to the interpreter
|
||||
uint8_t *side_exit = yjit_side_exit(jit, ctx);
|
||||
|
||||
// If the class uses the default allocator, instances should all be T_OBJECT
|
||||
// NOTE: This assumes nobody changes the allocator of the class after allocation.
|
||||
// Eventually, we can encode whether an object is T_OBJECT or not
|
||||
// inside object shapes.
|
||||
if (rb_get_alloc_func(ic->entry->class_value) != rb_class_allocate_instance) {
|
||||
return YJIT_CANT_COMPILE;
|
||||
if (rb_get_alloc_func(self_klass) != rb_class_allocate_instance) {
|
||||
jmp_ptr(cb, side_exit);
|
||||
return YJIT_END_BLOCK;
|
||||
}
|
||||
RUBY_ASSERT(BUILTIN_TYPE(self_val) == T_OBJECT); // because we checked the allocator
|
||||
|
||||
ID id = (ID)jit_get_arg(jit, 0);
|
||||
struct rb_iv_index_tbl_entry *ent;
|
||||
struct st_table *iv_index_tbl = ROBJECT_IV_INDEX_TBL(self_val);
|
||||
|
||||
// Lookup index for the ivar the instruction loads
|
||||
if (iv_index_tbl && rb_iv_index_tbl_lookup(iv_index_tbl, id, &ent)) {
|
||||
uint32_t ivar_index = ent->index;
|
||||
|
||||
// Load self from CFP
|
||||
mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
|
||||
|
||||
guard_self_is_object(cb, REG0, COUNTED_EXIT(side_exit, getivar_se_self_not_heap), ctx);
|
||||
|
||||
// Guard that self has a known class
|
||||
x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass));
|
||||
mov(cb, REG1, klass_opnd);
|
||||
x86opnd_t serial_opnd = mem_opnd(64, REG1, offsetof(struct RClass, class_serial));
|
||||
cmp(cb, serial_opnd, imm_opnd(RCLASS_SERIAL(self_klass)));
|
||||
jit_chain_guard(JCC_JNE, jit, ctx, GETIVAR_MAX_DEPTH, side_exit);
|
||||
|
||||
// Compile time self is embedded and the ivar index is within the object
|
||||
if (RB_FL_TEST_RAW(self_val, ROBJECT_EMBED) && ivar_index < ROBJECT_EMBED_LEN_MAX) {
|
||||
// See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
|
||||
|
||||
// Guard that self is embedded
|
||||
// TODO: BT and JC is shorter
|
||||
x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
|
||||
test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
|
||||
jit_chain_guard(JCC_JZ, jit, ctx, GETIVAR_MAX_DEPTH, side_exit);
|
||||
|
||||
// Load the variable
|
||||
x86opnd_t ivar_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.ary) + ivar_index * SIZEOF_VALUE);
|
||||
mov(cb, REG1, ivar_opnd);
|
||||
|
||||
// Guard that the variable is not Qundef
|
||||
cmp(cb, REG1, imm_opnd(Qundef));
|
||||
je_ptr(cb, COUNTED_EXIT(side_exit, getivar_undef));
|
||||
|
||||
// Push the ivar on the stack
|
||||
x86opnd_t out_opnd = ctx_stack_push(ctx, T_NONE);
|
||||
mov(cb, out_opnd, REG1);
|
||||
}
|
||||
else {
|
||||
// Compile time self is *not* embeded.
|
||||
|
||||
// Guard that self is *not* embedded
|
||||
// See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
|
||||
x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
|
||||
test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
|
||||
jit_chain_guard(JCC_JNZ, jit, ctx, GETIVAR_MAX_DEPTH, side_exit);
|
||||
|
||||
// check that the extended table is big enough
|
||||
if (ivar_index >= ROBJECT_EMBED_LEN_MAX + 1) {
|
||||
// Check that the slot is inside the extended table (num_slots > index)
|
||||
x86opnd_t num_slots = mem_opnd(32, REG0, offsetof(struct RObject, as.heap.numiv));
|
||||
cmp(cb, num_slots, imm_opnd(ivar_index));
|
||||
jle_ptr(cb, COUNTED_EXIT(side_exit, getivar_idx_out_of_range));
|
||||
}
|
||||
|
||||
// Get a pointer to the extended table
|
||||
x86opnd_t tbl_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.heap.ivptr));
|
||||
mov(cb, REG0, tbl_opnd);
|
||||
|
||||
// Read the ivar from the extended table
|
||||
x86opnd_t ivar_opnd = mem_opnd(64, REG0, sizeof(VALUE) * ivar_index);
|
||||
mov(cb, REG0, ivar_opnd);
|
||||
|
||||
// Check that the ivar is not Qundef
|
||||
cmp(cb, REG0, imm_opnd(Qundef));
|
||||
je_ptr(cb, COUNTED_EXIT(side_exit, getivar_undef));
|
||||
|
||||
// Push the ivar on the stack
|
||||
x86opnd_t out_opnd = ctx_stack_push(ctx, T_NONE);
|
||||
mov(cb, out_opnd, REG0);
|
||||
}
|
||||
|
||||
|
||||
// Jump to next instruction. This allows guard chains to share the same successor.
|
||||
{
|
||||
ctx_t reset_depth = *ctx;
|
||||
reset_depth.chain_depth = 0;
|
||||
|
||||
blockid_t jump_block = { jit->iseq, jit_next_insn_idx(jit) };
|
||||
|
||||
// Generate the jump instruction
|
||||
gen_direct_jump(
|
||||
&reset_depth,
|
||||
jump_block
|
||||
);
|
||||
}
|
||||
|
||||
return YJIT_END_BLOCK;
|
||||
}
|
||||
|
||||
uint32_t ivar_index = ic->entry->index;
|
||||
|
||||
// Create a size-exit to fall back to the interpreter
|
||||
uint8_t* side_exit = yjit_side_exit(jit, ctx);
|
||||
|
||||
// Load self from CFP
|
||||
mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
|
||||
|
||||
guard_self_is_object(cb, REG0, side_exit, ctx);
|
||||
|
||||
// Bail if receiver class is different from compiled time call cache class
|
||||
x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass));
|
||||
mov(cb, REG1, klass_opnd);
|
||||
x86opnd_t serial_opnd = mem_opnd(64, REG1, offsetof(struct RClass, class_serial));
|
||||
cmp(cb, serial_opnd, imm_opnd(ic->entry->class_serial));
|
||||
jne_ptr(cb, side_exit);
|
||||
|
||||
// Bail if the ivars are not on the extended table
|
||||
// See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
|
||||
x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
|
||||
test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
|
||||
jnz_ptr(cb, side_exit);
|
||||
|
||||
// check that the extended table is big enough
|
||||
if (ivar_index >= ROBJECT_EMBED_LEN_MAX + 1) {
|
||||
// Check that the slot is inside the extended table (num_slots > index)
|
||||
x86opnd_t num_slots = mem_opnd(32, REG0, offsetof(struct RObject, as.heap.numiv));
|
||||
cmp(cb, num_slots, imm_opnd(ivar_index));
|
||||
jle_ptr(cb, side_exit);
|
||||
}
|
||||
|
||||
// Get a pointer to the extended table
|
||||
x86opnd_t tbl_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.heap.ivptr));
|
||||
mov(cb, REG0, tbl_opnd);
|
||||
|
||||
// Read the ivar from the extended table
|
||||
x86opnd_t ivar_opnd = mem_opnd(64, REG0, sizeof(VALUE) * ivar_index);
|
||||
mov(cb, REG0, ivar_opnd);
|
||||
|
||||
// Check that the ivar is not Qundef
|
||||
cmp(cb, REG0, imm_opnd(Qundef));
|
||||
je_ptr(cb, side_exit);
|
||||
|
||||
// Push the ivar on the stack
|
||||
x86opnd_t out_opnd = ctx_stack_push(ctx, T_NONE);
|
||||
mov(cb, out_opnd, REG0);
|
||||
|
||||
return YJIT_KEEP_COMPILING;
|
||||
// Take side exit because YJIT_CANT_COMPILE can exit to a JIT entry point and
|
||||
// form an infinite loop when chain_depth > 0.
|
||||
jmp_ptr(cb, side_exit);
|
||||
return YJIT_END_BLOCK;
|
||||
}
|
||||
|
||||
static codegen_status_t
|
||||
|
17
yjit_core.c
17
yjit_core.c
@ -10,7 +10,7 @@
|
||||
#define MAX_VERSIONS 4
|
||||
|
||||
// Maximum number of branch instructions we can track
|
||||
#define MAX_BRANCHES 32768
|
||||
#define MAX_BRANCHES 100000
|
||||
|
||||
// Registered branch entries
|
||||
branch_t branch_entries[MAX_BRANCHES];
|
||||
@ -344,7 +344,8 @@ uint8_t* gen_entry_point(const rb_iseq_t *iseq, uint32_t insn_idx, rb_execution_
|
||||
|
||||
// Called by the generated code when a branch stub is executed
|
||||
// Triggers compilation of branches and code patching
|
||||
uint8_t* branch_stub_hit(uint32_t branch_idx, uint32_t target_idx, rb_execution_context_t* ec)
|
||||
static uint8_t *
|
||||
branch_stub_hit(uint32_t branch_idx, uint32_t target_idx, rb_execution_context_t* ec)
|
||||
{
|
||||
uint8_t* dst_addr;
|
||||
|
||||
@ -380,18 +381,18 @@ uint8_t* branch_stub_hit(uint32_t branch_idx, uint32_t target_idx, rb_execution_
|
||||
ctx_t generic_ctx = DEFAULT_CTX;
|
||||
generic_ctx.stack_size = target_ctx->stack_size;
|
||||
generic_ctx.sp_offset = target_ctx->sp_offset;
|
||||
if (get_num_versions(target) >= MAX_VERSIONS - 1)
|
||||
{
|
||||
//fprintf(stderr, "version limit hit in branch_stub_hit\n");
|
||||
target_ctx = &generic_ctx;
|
||||
if (target_ctx->chain_depth == 0) { // guard chains implement limits individually
|
||||
if (get_num_versions(target) >= MAX_VERSIONS - 1) {
|
||||
//fprintf(stderr, "version limit hit in branch_stub_hit\n");
|
||||
target_ctx = &generic_ctx;
|
||||
}
|
||||
}
|
||||
|
||||
// Try to find a compiled version of this block
|
||||
block_t* p_block = find_block_version(target, target_ctx);
|
||||
|
||||
// If this block hasn't yet been compiled
|
||||
if (!p_block)
|
||||
{
|
||||
if (!p_block) {
|
||||
p_block = gen_block_version(target, target_ctx, ec);
|
||||
}
|
||||
|
||||
|
@ -506,7 +506,9 @@ yjit_disasm_init(VALUE klass)
|
||||
{
|
||||
csh * handle;
|
||||
VALUE disasm = TypedData_Make_Struct(klass, csh, &yjit_disasm_type, handle);
|
||||
cs_open(CS_ARCH_X86, CS_MODE_64, handle);
|
||||
if (cs_open(CS_ARCH_X86, CS_MODE_64, handle) != CS_ERR_OK) {
|
||||
rb_raise(rb_eRuntimeError, "failed to make Capstone handle");
|
||||
}
|
||||
return disasm;
|
||||
}
|
||||
|
||||
@ -518,7 +520,7 @@ yjit_disasm(VALUE self, VALUE code, VALUE from)
|
||||
cs_insn *insns;
|
||||
|
||||
TypedData_Get_Struct(self, csh, &yjit_disasm_type, handle);
|
||||
count = cs_disasm(*handle, (uint8_t*)StringValuePtr(code), RSTRING_LEN(code), NUM2INT(from), 0, &insns);
|
||||
count = cs_disasm(*handle, (uint8_t*)StringValuePtr(code), RSTRING_LEN(code), NUM2ULL(from), 0, &insns);
|
||||
VALUE insn_list = rb_ary_new_capa(count);
|
||||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
|
@ -47,6 +47,10 @@ YJIT_DECLARE_COUNTERS(
|
||||
leave_se_finish_frame,
|
||||
leave_se_interrupt,
|
||||
|
||||
getivar_se_self_not_heap,
|
||||
getivar_idx_out_of_range,
|
||||
getivar_undef,
|
||||
|
||||
// Member with known name for iterating over counters
|
||||
last_member
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user