uJIT: Implement opt_getinlinecache

* ujit: implement opt_getinlinecache

Aggressively bet that writes to constants don't happen and invalidate
all opt_getinlinecache blocks on any and all constant writes.

Use alignment padding on block_t to track this assumption. No change to
sizeof(block_t).

* Fix compile warnings when not RUBY_DEBUG
* Fix reversed condition
* Switch to st_table to keep track of assumptions

Co-authored-by: Aaron Patterson <aaron.patterson@gmail.com>
Co-authored-by: Maxime Chevalier-Boisvert <maximechevalierb@gmail.com>
This commit is contained in:
Alan Wu 2021-02-25 15:10:38 -05:00
parent f93f3d6aa1
commit 57977ba30d
8 changed files with 165 additions and 44 deletions

View File

@ -10590,6 +10590,7 @@ ractor.$(OBJEXT): {$(VPATH)}thread.h
ractor.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h
ractor.$(OBJEXT): {$(VPATH)}thread_native.h
ractor.$(OBJEXT): {$(VPATH)}transient_heap.h
ractor.$(OBJEXT): {$(VPATH)}ujit.h
ractor.$(OBJEXT): {$(VPATH)}variable.h
ractor.$(OBJEXT): {$(VPATH)}vm_core.h
ractor.$(OBJEXT): {$(VPATH)}vm_debug.h

View File

@ -16,6 +16,7 @@
#include "variable.h"
#include "gc.h"
#include "transient_heap.h"
#include "ujit.h"
VALUE rb_cRactor;
@ -1604,6 +1605,7 @@ ractor_create(rb_execution_context_t *ec, VALUE self, VALUE loc, VALUE name, VAL
r->verbose = cr->verbose;
r->debug = cr->debug;
rb_ujit_before_ractor_spawn();
rb_thread_create_ractor(r, args, block);
RB_GC_GUARD(rv);

1
ujit.h
View File

@ -56,5 +56,6 @@ void rb_ujit_constant_state_changed(void);
void rb_ujit_iseq_mark(const struct rb_iseq_constant_body *body);
void rb_ujit_iseq_update_references(const struct rb_iseq_constant_body *body);
void rb_ujit_iseq_free(const struct rb_iseq_constant_body *body);
void rb_ujit_before_ractor_spawn(void);
#endif // #ifndef UJIT_H

View File

@ -60,19 +60,20 @@ jit_get_arg(jitstate_t* jit, size_t arg_idx)
return *(jit->pc + arg_idx + 1);
}
// Load a pointer to a GC'd object into a register and keep track of the reference
// Load a VALUE into a register and keep track of the reference if it is on the GC heap.
static void
jit_mov_gc_ptr(jitstate_t* jit, codeblock_t* cb, x86opnd_t reg, VALUE ptr)
{
RUBY_ASSERT(reg.type == OPND_REG && reg.num_bits == 64);
RUBY_ASSERT(!SPECIAL_CONST_P(ptr));
mov(cb, reg, const_ptr_opnd((void*)ptr));
// The pointer immediate is encoded as the last part of the mov written out.
uint32_t ptr_offset = cb->write_pos - sizeof(VALUE);
if (!rb_darray_append(&jit->block->gc_object_offsets, ptr_offset)) {
rb_bug("allocation failed");
if (!SPECIAL_CONST_P(ptr)) {
if (!rb_darray_append(&jit->block->gc_object_offsets, ptr_offset)) {
rb_bug("allocation failed");
}
}
}
@ -252,12 +253,14 @@ ujit_gen_block(ctx_t* ctx, block_t* block)
break;
}
#if RUBY_DEBUG
// Accumulate stats about instructions executed
if (rb_ujit_opts.gen_stats) {
// Count instructions executed by the JIT
mov(cb, REG0, const_ptr_opnd((void *)&rb_ujit_exec_insns_count));
add(cb, mem_opnd(64, REG0, 0), imm_opnd(1));
}
#endif
//fprintf(stderr, "compiling %d: %s\n", insn_idx, insn_name(opcode));
//print_str(cb, insn_name(opcode));
@ -1115,6 +1118,7 @@ gen_oswb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_c
// Pointer to the klass field of the receiver &(recv->klass)
x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass));
// FIXME: This leaks when st_insert raises NoMemoryError
assume_method_lookup_stable(cd->cc, cme, jit->block);
// Bail if receiver class is different from compile-time call cache class
@ -1570,6 +1574,48 @@ gen_leave(jitstate_t* jit, ctx_t* ctx)
return true;
}
RUBY_EXTERN rb_serial_t ruby_vm_global_constant_state;
static bool
gen_opt_getinlinecache(jitstate_t *jit, ctx_t *ctx)
{
VALUE jump_offset = jit_get_arg(jit, 0);
VALUE const_cache_as_value = jit_get_arg(jit, 1);
IC ic = (IC)const_cache_as_value;
// See vm_ic_hit_p().
struct iseq_inline_constant_cache_entry *ice = ic->entry;
if (!ice) return false; // cache not filled
if (ice->ic_serial != ruby_vm_global_constant_state) {
// Cache miss at compile time.
return false;
}
if (ice->ic_cref) {
// Only compile for caches that don't care about lexical scope.
return false;
}
// Optimize for single ractor mode.
// FIXME: This leaks when st_insert raises NoMemoryError
if (!assume_single_ractor_mode(jit->block)) return false;
// Invalidate output code on any and all constant writes
// FIXME: This leaks when st_insert raises NoMemoryError
if (!assume_stable_global_constant_state(jit->block)) return false;
x86opnd_t stack_top = ctx_stack_push(ctx, T_NONE);
jit_mov_gc_ptr(jit, cb, REG0, ice->value);
mov(cb, stack_top, REG0);
// Jump over the code for filling the cache
uint32_t jump_idx = jit_next_insn_idx(jit) + (int32_t)jump_offset;
gen_direct_jump(
ctx,
(blockid_t){ .iseq = jit->iseq, .idx = jump_idx }
);
return true;
}
void ujit_reg_op(int opcode, codegen_fn gen_fn, bool is_branch)
{
// Check that the op wasn't previously registered
@ -1620,6 +1666,9 @@ ujit_init_codegen(void)
ujit_reg_op(BIN(opt_and), gen_opt_and, false);
ujit_reg_op(BIN(opt_minus), gen_opt_minus, false);
ujit_reg_op(BIN(opt_plus), gen_opt_plus, false);
// Map branch instruction opcodes to codegen functions
ujit_reg_op(BIN(opt_getinlinecache), gen_opt_getinlinecache, true);
ujit_reg_op(BIN(branchif), gen_branchif, true);
ujit_reg_op(BIN(branchunless), gen_branchunless, true);
ujit_reg_op(BIN(jump), gen_jump, true);

View File

@ -175,8 +175,10 @@ add_block_version(blockid_t blockid, block_t* block)
rb_bug("allocation failed");
}
#if RUBY_DEBUG
// First block compiled for this iseq
rb_compiled_iseq_count++;
#endif
}
block_t *first_version = get_first_version(iseq, blockid.idx);
@ -199,7 +201,7 @@ add_block_version(blockid_t blockid, block_t* block)
RB_OBJ_WRITTEN(iseq, Qundef, block->dependencies.cc);
RB_OBJ_WRITTEN(iseq, Qundef, block->dependencies.cme);
// Run write barrier for all objects in generated code.
// Run write barriers for all objects in generated code.
uint32_t *offset_element;
rb_darray_foreach(block->gc_object_offsets, offset_idx, offset_element) {
uint32_t offset_to_value = *offset_element;
@ -601,9 +603,12 @@ void
ujit_free_block(block_t *block)
{
ujit_unlink_method_lookup_dependency(block);
ujit_block_assumptions_free(block);
rb_darray_free(block->incoming);
free(block);
rb_darray_free(block->gc_object_offsets);
free(block);
}
// Invalidate one specific block version

View File

@ -107,9 +107,6 @@ typedef struct ujit_block_version
// Bytecode sequence (iseq, idx) this is a version of
blockid_t blockid;
// Index one past the last instruction in the iseq
uint32_t end_idx;
// Context at the start of the block
ctx_t ctx;
@ -120,6 +117,9 @@ typedef struct ujit_block_version
// List of incoming branches indices
int32_array_t incoming;
// Offsets for GC managed objects in the mainline code block
int32_array_t gc_object_offsets;
// Next block version for this blockid (singly-linked list)
struct ujit_block_version *next;
@ -132,6 +132,9 @@ typedef struct ujit_block_version
VALUE cme;
VALUE iseq;
} dependencies;
// Index one past the last instruction in the iseq
uint32_t end_idx;
} block_t;
// Context object methods

View File

@ -24,10 +24,12 @@ VALUE cUjitBlock;
VALUE cUjitDisasm;
VALUE cUjitDisasmInsn;
#if RUBY_DEBUG
static int64_t vm_insns_count = 0;
int64_t rb_ujit_exec_insns_count = 0;
static int64_t exit_op_count[VM_INSTRUCTION_SIZE] = { 0 };
int64_t rb_compiled_iseq_count = 0;
#endif
// Machine code blocks (executable memory)
extern codeblock_t *cb;
@ -45,7 +47,7 @@ static const rb_data_type_t ujit_block_type = {
};
// Write the uJIT entry point pre-call bytes
void
void
cb_write_pre_call_bytes(codeblock_t* cb)
{
for (size_t i = 0; i < sizeof(ujit_with_ec_pre_call_bytes); ++i)
@ -53,7 +55,7 @@ cb_write_pre_call_bytes(codeblock_t* cb)
}
// Write the uJIT exit post-call bytes
void
void
cb_write_post_call_bytes(codeblock_t* cb)
{
for (size_t i = 0; i < sizeof(ujit_with_ec_post_call_bytes); ++i)
@ -129,46 +131,74 @@ struct ujit_root_struct {
int unused; // empty structs are not legal in C99
};
// Map cme_or_cc => [[iseq, offset]]. An entry in the map means compiled code at iseq[offset]
// is only valid when cme_or_cc is valid
static void
block_array_shuffle_remove(rb_ujit_block_array_t blocks, block_t *to_remove) {
block_t **elem;
rb_darray_foreach(blocks, i, elem) {
if (*elem == to_remove) {
// Remove the current element by moving the last element here then popping.
*elem = rb_darray_get(blocks, rb_darray_size(blocks) - 1);
rb_darray_pop_back(blocks);
break;
}
}
}
// Map cme_or_cc => [block]
static st_table *method_lookup_dependency;
struct compiled_region {
block_t *block;
};
typedef rb_darray(struct compiled_region) block_array_t;
static int
add_lookup_dependency_i(st_data_t *key, st_data_t *value, st_data_t data, int existing)
{
struct compiled_region *region = (struct compiled_region *)data;
block_t *new_block = (block_t *)data;
block_array_t regions = NULL;
rb_ujit_block_array_t blocks = NULL;
if (existing) {
regions = (block_array_t )*value;
blocks = (rb_ujit_block_array_t)*value;
}
if (!rb_darray_append(&regions, *region)) {
if (!rb_darray_append(&blocks, new_block)) {
rb_bug("ujit: failed to add method lookup dependency"); // TODO: we could bail out of compiling instead
}
*value = (st_data_t)regions;
*value = (st_data_t)blocks;
return ST_CONTINUE;
}
// Remember that the currently compiling region is only valid while cme and cc are valid
// Remember that the currently compiling block is only valid while cme and cc are valid
void
assume_method_lookup_stable(const struct rb_callcache *cc, const rb_callable_method_entry_t *cme, block_t *block)
{
RUBY_ASSERT(block != NULL);
RUBY_ASSERT(block->dependencies.cc == 0 && block->dependencies.cme == 0);
struct compiled_region region = { .block = block };
st_update(method_lookup_dependency, (st_data_t)cme, add_lookup_dependency_i, (st_data_t)&region);
st_update(method_lookup_dependency, (st_data_t)cme, add_lookup_dependency_i, (st_data_t)block);
block->dependencies.cme = (VALUE)cme;
st_update(method_lookup_dependency, (st_data_t)cc, add_lookup_dependency_i, (st_data_t)&region);
st_update(method_lookup_dependency, (st_data_t)cc, add_lookup_dependency_i, (st_data_t)block);
block->dependencies.cc = (VALUE)cc;
}
static st_table *blocks_assuming_single_ractor_mode;
// Can raise NoMemoryError.
RBIMPL_ATTR_NODISCARD()
bool
assume_single_ractor_mode(block_t *block) {
if (rb_multi_ractor_p()) return false;
st_insert(blocks_assuming_single_ractor_mode, (st_data_t)block, 1);
return true;
}
static st_table *blocks_assuming_stable_global_constant_state;
// Assume that the global constant state has not changed since call to this function.
// Can raise NoMemoryError.
RBIMPL_ATTR_NODISCARD()
bool
assume_stable_global_constant_state(block_t *block) {
st_insert(blocks_assuming_stable_global_constant_state, (st_data_t)block, 1);
return true;
}
static int
ujit_root_mark_i(st_data_t k, st_data_t v, st_data_t ignore)
{
@ -253,11 +283,11 @@ rb_ujit_method_lookup_change(VALUE cme_or_cc)
// Invalidate all regions that depend on the cme or cc
st_data_t key = (st_data_t)cme_or_cc, image;
if (st_delete(method_lookup_dependency, &key, &image)) {
block_array_t array = (void *)image;
struct compiled_region *elem;
rb_ujit_block_array_t array = (void *)image;
block_t **elem;
rb_darray_foreach(array, i, elem) {
invalidate_block_version(elem->block);
invalidate_block_version(*elem);
}
rb_darray_free(array);
@ -272,19 +302,9 @@ remove_method_lookup_dependency(VALUE cc_or_cme, block_t *block)
{
st_data_t key = (st_data_t)cc_or_cme, image;
if (st_lookup(method_lookup_dependency, key, &image)) {
block_array_t array = (void *)image;
struct compiled_region *elem;
rb_ujit_block_array_t array = (void *)image;
// Find the block we are removing
rb_darray_foreach(array, i, elem) {
if (elem->block == block) {
// Remove the current element by moving the last element here.
// Order in the region array doesn't matter.
*elem = rb_darray_get(array, rb_darray_size(array) - 1);
rb_darray_pop_back(array);
break;
}
}
block_array_shuffle_remove(array, block);
if (rb_darray_size(array) == 0) {
st_delete(method_lookup_dependency, &key, NULL);
@ -300,6 +320,19 @@ ujit_unlink_method_lookup_dependency(block_t *block)
if (block->dependencies.cme) remove_method_lookup_dependency(block->dependencies.cme, block);
}
void
ujit_block_assumptions_free(block_t *block)
{
st_data_t as_st_data = (st_data_t)block;
if (blocks_assuming_stable_global_constant_state) {
st_delete(blocks_assuming_stable_global_constant_state, &as_st_data, NULL);
}
if (blocks_assuming_single_ractor_mode) {
st_delete(blocks_assuming_single_ractor_mode, &as_st_data, NULL);
}
}
void
rb_ujit_compile_iseq(const rb_iseq_t *iseq)
{
@ -411,11 +444,28 @@ rb_ujit_bop_redefined(VALUE klass, const rb_method_entry_t *me, enum ruby_basic_
//fprintf(stderr, "bop redefined\n");
}
static int
block_invalidation_iterator(st_data_t key, st_data_t value, st_data_t data) {
block_t *block = (block_t *)key;
invalidate_block_version(block); // Thankfully, st_table supports deleteing while iterating
return ST_CONTINUE;
}
/* Called when the constant state changes */
void
rb_ujit_constant_state_changed(void)
{
//fprintf(stderr, "bop redefined\n");
if (blocks_assuming_stable_global_constant_state) {
st_foreach(blocks_assuming_stable_global_constant_state, block_invalidation_iterator, 0);
}
}
void
rb_ujit_before_ractor_spawn(void)
{
if (blocks_assuming_single_ractor_mode) {
st_foreach(blocks_assuming_single_ractor_mode, block_invalidation_iterator, 0);
}
}
#if HAVE_LIBCAPSTONE
@ -651,6 +701,9 @@ rb_ujit_init(struct rb_ujit_options *options)
rb_ujit_opts.call_threshold = 2;
}
blocks_assuming_stable_global_constant_state = st_init_numtable();
blocks_assuming_single_ractor_mode = st_init_numtable();
ujit_init_core();
ujit_init_codegen();

View File

@ -10,6 +10,7 @@
#include "stdint.h"
#include "stdbool.h"
#include "internal.h"
#include "ruby/internal/attr/nodiscard.h"
#include "vm_core.h"
#include "vm_callinfo.h"
#include "builtin.h"
@ -32,9 +33,15 @@ int opcode_at_pc(const rb_iseq_t *iseq, const VALUE *pc);
void check_cfunc_dispatch(VALUE receiver, struct rb_call_data *cd, void *callee, rb_callable_method_entry_t *compile_time_cme);
bool cfunc_needs_frame(const rb_method_cfunc_t *cfunc);
void assume_method_lookup_stable(const struct rb_callcache *cc, const rb_callable_method_entry_t *cme, block_t* block);
RBIMPL_ATTR_NODISCARD() bool assume_single_ractor_mode(block_t *block);
RBIMPL_ATTR_NODISCARD() bool assume_stable_global_constant_state(block_t *block);
// this function *must* return passed exit_pc
const VALUE *rb_ujit_count_side_exit_op(const VALUE *exit_pc);
void ujit_unlink_method_lookup_dependency(block_t *block);
void ujit_block_assumptions_free(block_t *block);
#endif // #ifndef UJIT_IFACE_H