uJIT: Implement opt_getinlinecache
* ujit: implement opt_getinlinecache Aggressively bet that writes to constants don't happen and invalidate all opt_getinlinecache blocks on any and all constant writes. Use alignment padding on block_t to track this assumption. No change to sizeof(block_t). * Fix compile warnings when not RUBY_DEBUG * Fix reversed condition * Switch to st_table to keep track of assumptions Co-authored-by: Aaron Patterson <aaron.patterson@gmail.com> Co-authored-by: Maxime Chevalier-Boisvert <maximechevalierb@gmail.com>
This commit is contained in:
parent
f93f3d6aa1
commit
57977ba30d
@ -10590,6 +10590,7 @@ ractor.$(OBJEXT): {$(VPATH)}thread.h
|
||||
ractor.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h
|
||||
ractor.$(OBJEXT): {$(VPATH)}thread_native.h
|
||||
ractor.$(OBJEXT): {$(VPATH)}transient_heap.h
|
||||
ractor.$(OBJEXT): {$(VPATH)}ujit.h
|
||||
ractor.$(OBJEXT): {$(VPATH)}variable.h
|
||||
ractor.$(OBJEXT): {$(VPATH)}vm_core.h
|
||||
ractor.$(OBJEXT): {$(VPATH)}vm_debug.h
|
||||
|
2
ractor.c
2
ractor.c
@ -16,6 +16,7 @@
|
||||
#include "variable.h"
|
||||
#include "gc.h"
|
||||
#include "transient_heap.h"
|
||||
#include "ujit.h"
|
||||
|
||||
VALUE rb_cRactor;
|
||||
|
||||
@ -1604,6 +1605,7 @@ ractor_create(rb_execution_context_t *ec, VALUE self, VALUE loc, VALUE name, VAL
|
||||
r->verbose = cr->verbose;
|
||||
r->debug = cr->debug;
|
||||
|
||||
rb_ujit_before_ractor_spawn();
|
||||
rb_thread_create_ractor(r, args, block);
|
||||
|
||||
RB_GC_GUARD(rv);
|
||||
|
1
ujit.h
1
ujit.h
@ -56,5 +56,6 @@ void rb_ujit_constant_state_changed(void);
|
||||
void rb_ujit_iseq_mark(const struct rb_iseq_constant_body *body);
|
||||
void rb_ujit_iseq_update_references(const struct rb_iseq_constant_body *body);
|
||||
void rb_ujit_iseq_free(const struct rb_iseq_constant_body *body);
|
||||
void rb_ujit_before_ractor_spawn(void);
|
||||
|
||||
#endif // #ifndef UJIT_H
|
||||
|
@ -60,19 +60,20 @@ jit_get_arg(jitstate_t* jit, size_t arg_idx)
|
||||
return *(jit->pc + arg_idx + 1);
|
||||
}
|
||||
|
||||
// Load a pointer to a GC'd object into a register and keep track of the reference
|
||||
// Load a VALUE into a register and keep track of the reference if it is on the GC heap.
|
||||
static void
|
||||
jit_mov_gc_ptr(jitstate_t* jit, codeblock_t* cb, x86opnd_t reg, VALUE ptr)
|
||||
{
|
||||
RUBY_ASSERT(reg.type == OPND_REG && reg.num_bits == 64);
|
||||
RUBY_ASSERT(!SPECIAL_CONST_P(ptr));
|
||||
|
||||
mov(cb, reg, const_ptr_opnd((void*)ptr));
|
||||
// The pointer immediate is encoded as the last part of the mov written out.
|
||||
uint32_t ptr_offset = cb->write_pos - sizeof(VALUE);
|
||||
|
||||
if (!rb_darray_append(&jit->block->gc_object_offsets, ptr_offset)) {
|
||||
rb_bug("allocation failed");
|
||||
if (!SPECIAL_CONST_P(ptr)) {
|
||||
if (!rb_darray_append(&jit->block->gc_object_offsets, ptr_offset)) {
|
||||
rb_bug("allocation failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -252,12 +253,14 @@ ujit_gen_block(ctx_t* ctx, block_t* block)
|
||||
break;
|
||||
}
|
||||
|
||||
#if RUBY_DEBUG
|
||||
// Accumulate stats about instructions executed
|
||||
if (rb_ujit_opts.gen_stats) {
|
||||
// Count instructions executed by the JIT
|
||||
mov(cb, REG0, const_ptr_opnd((void *)&rb_ujit_exec_insns_count));
|
||||
add(cb, mem_opnd(64, REG0, 0), imm_opnd(1));
|
||||
}
|
||||
#endif
|
||||
|
||||
//fprintf(stderr, "compiling %d: %s\n", insn_idx, insn_name(opcode));
|
||||
//print_str(cb, insn_name(opcode));
|
||||
@ -1115,6 +1118,7 @@ gen_oswb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_c
|
||||
// Pointer to the klass field of the receiver &(recv->klass)
|
||||
x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass));
|
||||
|
||||
// FIXME: This leaks when st_insert raises NoMemoryError
|
||||
assume_method_lookup_stable(cd->cc, cme, jit->block);
|
||||
|
||||
// Bail if receiver class is different from compile-time call cache class
|
||||
@ -1570,6 +1574,48 @@ gen_leave(jitstate_t* jit, ctx_t* ctx)
|
||||
return true;
|
||||
}
|
||||
|
||||
RUBY_EXTERN rb_serial_t ruby_vm_global_constant_state;
|
||||
static bool
|
||||
gen_opt_getinlinecache(jitstate_t *jit, ctx_t *ctx)
|
||||
{
|
||||
VALUE jump_offset = jit_get_arg(jit, 0);
|
||||
VALUE const_cache_as_value = jit_get_arg(jit, 1);
|
||||
IC ic = (IC)const_cache_as_value;
|
||||
|
||||
// See vm_ic_hit_p().
|
||||
struct iseq_inline_constant_cache_entry *ice = ic->entry;
|
||||
if (!ice) return false; // cache not filled
|
||||
if (ice->ic_serial != ruby_vm_global_constant_state) {
|
||||
// Cache miss at compile time.
|
||||
return false;
|
||||
}
|
||||
if (ice->ic_cref) {
|
||||
// Only compile for caches that don't care about lexical scope.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Optimize for single ractor mode.
|
||||
// FIXME: This leaks when st_insert raises NoMemoryError
|
||||
if (!assume_single_ractor_mode(jit->block)) return false;
|
||||
|
||||
// Invalidate output code on any and all constant writes
|
||||
// FIXME: This leaks when st_insert raises NoMemoryError
|
||||
if (!assume_stable_global_constant_state(jit->block)) return false;
|
||||
|
||||
x86opnd_t stack_top = ctx_stack_push(ctx, T_NONE);
|
||||
jit_mov_gc_ptr(jit, cb, REG0, ice->value);
|
||||
mov(cb, stack_top, REG0);
|
||||
|
||||
// Jump over the code for filling the cache
|
||||
uint32_t jump_idx = jit_next_insn_idx(jit) + (int32_t)jump_offset;
|
||||
gen_direct_jump(
|
||||
ctx,
|
||||
(blockid_t){ .iseq = jit->iseq, .idx = jump_idx }
|
||||
);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ujit_reg_op(int opcode, codegen_fn gen_fn, bool is_branch)
|
||||
{
|
||||
// Check that the op wasn't previously registered
|
||||
@ -1620,6 +1666,9 @@ ujit_init_codegen(void)
|
||||
ujit_reg_op(BIN(opt_and), gen_opt_and, false);
|
||||
ujit_reg_op(BIN(opt_minus), gen_opt_minus, false);
|
||||
ujit_reg_op(BIN(opt_plus), gen_opt_plus, false);
|
||||
|
||||
// Map branch instruction opcodes to codegen functions
|
||||
ujit_reg_op(BIN(opt_getinlinecache), gen_opt_getinlinecache, true);
|
||||
ujit_reg_op(BIN(branchif), gen_branchif, true);
|
||||
ujit_reg_op(BIN(branchunless), gen_branchunless, true);
|
||||
ujit_reg_op(BIN(jump), gen_jump, true);
|
||||
|
@ -175,8 +175,10 @@ add_block_version(blockid_t blockid, block_t* block)
|
||||
rb_bug("allocation failed");
|
||||
}
|
||||
|
||||
#if RUBY_DEBUG
|
||||
// First block compiled for this iseq
|
||||
rb_compiled_iseq_count++;
|
||||
#endif
|
||||
}
|
||||
|
||||
block_t *first_version = get_first_version(iseq, blockid.idx);
|
||||
@ -199,7 +201,7 @@ add_block_version(blockid_t blockid, block_t* block)
|
||||
RB_OBJ_WRITTEN(iseq, Qundef, block->dependencies.cc);
|
||||
RB_OBJ_WRITTEN(iseq, Qundef, block->dependencies.cme);
|
||||
|
||||
// Run write barrier for all objects in generated code.
|
||||
// Run write barriers for all objects in generated code.
|
||||
uint32_t *offset_element;
|
||||
rb_darray_foreach(block->gc_object_offsets, offset_idx, offset_element) {
|
||||
uint32_t offset_to_value = *offset_element;
|
||||
@ -601,9 +603,12 @@ void
|
||||
ujit_free_block(block_t *block)
|
||||
{
|
||||
ujit_unlink_method_lookup_dependency(block);
|
||||
ujit_block_assumptions_free(block);
|
||||
|
||||
rb_darray_free(block->incoming);
|
||||
free(block);
|
||||
rb_darray_free(block->gc_object_offsets);
|
||||
|
||||
free(block);
|
||||
}
|
||||
|
||||
// Invalidate one specific block version
|
||||
|
@ -107,9 +107,6 @@ typedef struct ujit_block_version
|
||||
// Bytecode sequence (iseq, idx) this is a version of
|
||||
blockid_t blockid;
|
||||
|
||||
// Index one past the last instruction in the iseq
|
||||
uint32_t end_idx;
|
||||
|
||||
// Context at the start of the block
|
||||
ctx_t ctx;
|
||||
|
||||
@ -120,6 +117,9 @@ typedef struct ujit_block_version
|
||||
// List of incoming branches indices
|
||||
int32_array_t incoming;
|
||||
|
||||
// Offsets for GC managed objects in the mainline code block
|
||||
int32_array_t gc_object_offsets;
|
||||
|
||||
// Next block version for this blockid (singly-linked list)
|
||||
struct ujit_block_version *next;
|
||||
|
||||
@ -132,6 +132,9 @@ typedef struct ujit_block_version
|
||||
VALUE cme;
|
||||
VALUE iseq;
|
||||
} dependencies;
|
||||
|
||||
// Index one past the last instruction in the iseq
|
||||
uint32_t end_idx;
|
||||
} block_t;
|
||||
|
||||
// Context object methods
|
||||
|
123
ujit_iface.c
123
ujit_iface.c
@ -24,10 +24,12 @@ VALUE cUjitBlock;
|
||||
VALUE cUjitDisasm;
|
||||
VALUE cUjitDisasmInsn;
|
||||
|
||||
#if RUBY_DEBUG
|
||||
static int64_t vm_insns_count = 0;
|
||||
int64_t rb_ujit_exec_insns_count = 0;
|
||||
static int64_t exit_op_count[VM_INSTRUCTION_SIZE] = { 0 };
|
||||
int64_t rb_compiled_iseq_count = 0;
|
||||
#endif
|
||||
|
||||
// Machine code blocks (executable memory)
|
||||
extern codeblock_t *cb;
|
||||
@ -45,7 +47,7 @@ static const rb_data_type_t ujit_block_type = {
|
||||
};
|
||||
|
||||
// Write the uJIT entry point pre-call bytes
|
||||
void
|
||||
void
|
||||
cb_write_pre_call_bytes(codeblock_t* cb)
|
||||
{
|
||||
for (size_t i = 0; i < sizeof(ujit_with_ec_pre_call_bytes); ++i)
|
||||
@ -53,7 +55,7 @@ cb_write_pre_call_bytes(codeblock_t* cb)
|
||||
}
|
||||
|
||||
// Write the uJIT exit post-call bytes
|
||||
void
|
||||
void
|
||||
cb_write_post_call_bytes(codeblock_t* cb)
|
||||
{
|
||||
for (size_t i = 0; i < sizeof(ujit_with_ec_post_call_bytes); ++i)
|
||||
@ -129,46 +131,74 @@ struct ujit_root_struct {
|
||||
int unused; // empty structs are not legal in C99
|
||||
};
|
||||
|
||||
// Map cme_or_cc => [[iseq, offset]]. An entry in the map means compiled code at iseq[offset]
|
||||
// is only valid when cme_or_cc is valid
|
||||
static void
|
||||
block_array_shuffle_remove(rb_ujit_block_array_t blocks, block_t *to_remove) {
|
||||
block_t **elem;
|
||||
rb_darray_foreach(blocks, i, elem) {
|
||||
if (*elem == to_remove) {
|
||||
// Remove the current element by moving the last element here then popping.
|
||||
*elem = rb_darray_get(blocks, rb_darray_size(blocks) - 1);
|
||||
rb_darray_pop_back(blocks);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Map cme_or_cc => [block]
|
||||
static st_table *method_lookup_dependency;
|
||||
|
||||
struct compiled_region {
|
||||
block_t *block;
|
||||
};
|
||||
|
||||
typedef rb_darray(struct compiled_region) block_array_t;
|
||||
|
||||
static int
|
||||
add_lookup_dependency_i(st_data_t *key, st_data_t *value, st_data_t data, int existing)
|
||||
{
|
||||
struct compiled_region *region = (struct compiled_region *)data;
|
||||
block_t *new_block = (block_t *)data;
|
||||
|
||||
block_array_t regions = NULL;
|
||||
rb_ujit_block_array_t blocks = NULL;
|
||||
if (existing) {
|
||||
regions = (block_array_t )*value;
|
||||
blocks = (rb_ujit_block_array_t)*value;
|
||||
}
|
||||
if (!rb_darray_append(®ions, *region)) {
|
||||
if (!rb_darray_append(&blocks, new_block)) {
|
||||
rb_bug("ujit: failed to add method lookup dependency"); // TODO: we could bail out of compiling instead
|
||||
}
|
||||
|
||||
*value = (st_data_t)regions;
|
||||
*value = (st_data_t)blocks;
|
||||
return ST_CONTINUE;
|
||||
}
|
||||
|
||||
// Remember that the currently compiling region is only valid while cme and cc are valid
|
||||
// Remember that the currently compiling block is only valid while cme and cc are valid
|
||||
void
|
||||
assume_method_lookup_stable(const struct rb_callcache *cc, const rb_callable_method_entry_t *cme, block_t *block)
|
||||
{
|
||||
RUBY_ASSERT(block != NULL);
|
||||
RUBY_ASSERT(block->dependencies.cc == 0 && block->dependencies.cme == 0);
|
||||
struct compiled_region region = { .block = block };
|
||||
st_update(method_lookup_dependency, (st_data_t)cme, add_lookup_dependency_i, (st_data_t)®ion);
|
||||
st_update(method_lookup_dependency, (st_data_t)cme, add_lookup_dependency_i, (st_data_t)block);
|
||||
block->dependencies.cme = (VALUE)cme;
|
||||
st_update(method_lookup_dependency, (st_data_t)cc, add_lookup_dependency_i, (st_data_t)®ion);
|
||||
st_update(method_lookup_dependency, (st_data_t)cc, add_lookup_dependency_i, (st_data_t)block);
|
||||
block->dependencies.cc = (VALUE)cc;
|
||||
}
|
||||
|
||||
static st_table *blocks_assuming_single_ractor_mode;
|
||||
|
||||
// Can raise NoMemoryError.
|
||||
RBIMPL_ATTR_NODISCARD()
|
||||
bool
|
||||
assume_single_ractor_mode(block_t *block) {
|
||||
if (rb_multi_ractor_p()) return false;
|
||||
|
||||
st_insert(blocks_assuming_single_ractor_mode, (st_data_t)block, 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
static st_table *blocks_assuming_stable_global_constant_state;
|
||||
|
||||
// Assume that the global constant state has not changed since call to this function.
|
||||
// Can raise NoMemoryError.
|
||||
RBIMPL_ATTR_NODISCARD()
|
||||
bool
|
||||
assume_stable_global_constant_state(block_t *block) {
|
||||
st_insert(blocks_assuming_stable_global_constant_state, (st_data_t)block, 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
static int
|
||||
ujit_root_mark_i(st_data_t k, st_data_t v, st_data_t ignore)
|
||||
{
|
||||
@ -253,11 +283,11 @@ rb_ujit_method_lookup_change(VALUE cme_or_cc)
|
||||
// Invalidate all regions that depend on the cme or cc
|
||||
st_data_t key = (st_data_t)cme_or_cc, image;
|
||||
if (st_delete(method_lookup_dependency, &key, &image)) {
|
||||
block_array_t array = (void *)image;
|
||||
struct compiled_region *elem;
|
||||
rb_ujit_block_array_t array = (void *)image;
|
||||
block_t **elem;
|
||||
|
||||
rb_darray_foreach(array, i, elem) {
|
||||
invalidate_block_version(elem->block);
|
||||
invalidate_block_version(*elem);
|
||||
}
|
||||
|
||||
rb_darray_free(array);
|
||||
@ -272,19 +302,9 @@ remove_method_lookup_dependency(VALUE cc_or_cme, block_t *block)
|
||||
{
|
||||
st_data_t key = (st_data_t)cc_or_cme, image;
|
||||
if (st_lookup(method_lookup_dependency, key, &image)) {
|
||||
block_array_t array = (void *)image;
|
||||
struct compiled_region *elem;
|
||||
rb_ujit_block_array_t array = (void *)image;
|
||||
|
||||
// Find the block we are removing
|
||||
rb_darray_foreach(array, i, elem) {
|
||||
if (elem->block == block) {
|
||||
// Remove the current element by moving the last element here.
|
||||
// Order in the region array doesn't matter.
|
||||
*elem = rb_darray_get(array, rb_darray_size(array) - 1);
|
||||
rb_darray_pop_back(array);
|
||||
break;
|
||||
}
|
||||
}
|
||||
block_array_shuffle_remove(array, block);
|
||||
|
||||
if (rb_darray_size(array) == 0) {
|
||||
st_delete(method_lookup_dependency, &key, NULL);
|
||||
@ -300,6 +320,19 @@ ujit_unlink_method_lookup_dependency(block_t *block)
|
||||
if (block->dependencies.cme) remove_method_lookup_dependency(block->dependencies.cme, block);
|
||||
}
|
||||
|
||||
void
|
||||
ujit_block_assumptions_free(block_t *block)
|
||||
{
|
||||
st_data_t as_st_data = (st_data_t)block;
|
||||
if (blocks_assuming_stable_global_constant_state) {
|
||||
st_delete(blocks_assuming_stable_global_constant_state, &as_st_data, NULL);
|
||||
}
|
||||
|
||||
if (blocks_assuming_single_ractor_mode) {
|
||||
st_delete(blocks_assuming_single_ractor_mode, &as_st_data, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
rb_ujit_compile_iseq(const rb_iseq_t *iseq)
|
||||
{
|
||||
@ -411,11 +444,28 @@ rb_ujit_bop_redefined(VALUE klass, const rb_method_entry_t *me, enum ruby_basic_
|
||||
//fprintf(stderr, "bop redefined\n");
|
||||
}
|
||||
|
||||
static int
|
||||
block_invalidation_iterator(st_data_t key, st_data_t value, st_data_t data) {
|
||||
block_t *block = (block_t *)key;
|
||||
invalidate_block_version(block); // Thankfully, st_table supports deleteing while iterating
|
||||
return ST_CONTINUE;
|
||||
}
|
||||
|
||||
/* Called when the constant state changes */
|
||||
void
|
||||
rb_ujit_constant_state_changed(void)
|
||||
{
|
||||
//fprintf(stderr, "bop redefined\n");
|
||||
if (blocks_assuming_stable_global_constant_state) {
|
||||
st_foreach(blocks_assuming_stable_global_constant_state, block_invalidation_iterator, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
rb_ujit_before_ractor_spawn(void)
|
||||
{
|
||||
if (blocks_assuming_single_ractor_mode) {
|
||||
st_foreach(blocks_assuming_single_ractor_mode, block_invalidation_iterator, 0);
|
||||
}
|
||||
}
|
||||
|
||||
#if HAVE_LIBCAPSTONE
|
||||
@ -651,6 +701,9 @@ rb_ujit_init(struct rb_ujit_options *options)
|
||||
rb_ujit_opts.call_threshold = 2;
|
||||
}
|
||||
|
||||
blocks_assuming_stable_global_constant_state = st_init_numtable();
|
||||
blocks_assuming_single_ractor_mode = st_init_numtable();
|
||||
|
||||
ujit_init_core();
|
||||
ujit_init_codegen();
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "stdint.h"
|
||||
#include "stdbool.h"
|
||||
#include "internal.h"
|
||||
#include "ruby/internal/attr/nodiscard.h"
|
||||
#include "vm_core.h"
|
||||
#include "vm_callinfo.h"
|
||||
#include "builtin.h"
|
||||
@ -32,9 +33,15 @@ int opcode_at_pc(const rb_iseq_t *iseq, const VALUE *pc);
|
||||
|
||||
void check_cfunc_dispatch(VALUE receiver, struct rb_call_data *cd, void *callee, rb_callable_method_entry_t *compile_time_cme);
|
||||
bool cfunc_needs_frame(const rb_method_cfunc_t *cfunc);
|
||||
|
||||
void assume_method_lookup_stable(const struct rb_callcache *cc, const rb_callable_method_entry_t *cme, block_t* block);
|
||||
RBIMPL_ATTR_NODISCARD() bool assume_single_ractor_mode(block_t *block);
|
||||
RBIMPL_ATTR_NODISCARD() bool assume_stable_global_constant_state(block_t *block);
|
||||
|
||||
// this function *must* return passed exit_pc
|
||||
const VALUE *rb_ujit_count_side_exit_op(const VALUE *exit_pc);
|
||||
|
||||
void ujit_unlink_method_lookup_dependency(block_t *block);
|
||||
void ujit_block_assumptions_free(block_t *block);
|
||||
|
||||
#endif // #ifndef UJIT_IFACE_H
|
||||
|
Loading…
x
Reference in New Issue
Block a user