De-dup identical callinfo objects

Previously every call to vm_ci_new (when the CI was not packable) would
result in a different callinfo being returned this meant that every
kwarg callsite had its own CI.

When calling, different CIs result in different CCs. These CIs and CCs
both end up persisted on the T_CLASS inside cc_tbl. So in an eval loop
this resulted in a memory leak of both types of object. This also likely
resulted in extra memory used, and extra time searching, in non-eval
cases.

For simplicity in this commit I always allocate a CI object inside
rb_vm_ci_lookup, but ideally we would lazily allocate it only when
needed. I hope to do that as a follow up in the future.
This commit is contained in:
John Hawthorn 2024-02-11 21:43:38 -08:00
parent 2a6917b463
commit 1c97abaaba
6 changed files with 129 additions and 11 deletions

1
gc.c
View File

@ -3745,6 +3745,7 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
case imemo_callinfo:
{
const struct rb_callinfo * ci = ((const struct rb_callinfo *)obj);
rb_vm_ci_free(ci);
if (ci->kwarg) {
((struct rb_callinfo_kwarg *)ci->kwarg)->references--;
if (ci->kwarg->references == 0) xfree((void *)ci->kwarg);

View File

@ -44,7 +44,7 @@ struct rb_classext_struct {
VALUE *iv_ptr;
struct rb_id_table *const_tbl;
struct rb_id_table *callable_m_tbl;
struct rb_id_table *cc_tbl; /* ID -> [[ci, cc1], cc2, ...] */
struct rb_id_table *cc_tbl; /* ID -> [[ci1, cc1], [ci2, cc2] ...] */
struct rb_id_table *cvc_tbl;
size_t superclass_depth;
VALUE *superclasses;

7
vm.c
View File

@ -2873,6 +2873,7 @@ rb_vm_update_references(void *ptr)
if (ptr) {
rb_vm_t *vm = ptr;
rb_gc_update_tbl_refs(vm->ci_table);
rb_gc_update_tbl_refs(vm->frozen_strings);
vm->mark_object_ary = rb_gc_location(vm->mark_object_ary);
vm->load_path = rb_gc_location(vm->load_path);
@ -3119,6 +3120,10 @@ ruby_vm_destruct(rb_vm_t *vm)
st_free_table(vm->loading_table);
vm->loading_table = 0;
}
if (vm->ci_table) {
st_free_table(vm->ci_table);
vm->ci_table = NULL;
}
if (vm->frozen_strings) {
st_free_table(vm->frozen_strings);
vm->frozen_strings = 0;
@ -3209,6 +3214,7 @@ vm_memsize(const void *ptr)
rb_vm_memsize_workqueue(&vm->workqueue) +
rb_st_memsize(vm->defined_module_hash) +
vm_memsize_at_exit_list(vm->at_exit) +
rb_st_memsize(vm->ci_table) +
rb_st_memsize(vm->frozen_strings) +
vm_memsize_builtin_function_table(vm->builtin_function_table) +
rb_id_table_memsize(vm->negative_cme_table) +
@ -4303,6 +4309,7 @@ Init_vm_objects(void)
/* initialize mark object array, hash */
vm->mark_object_ary = rb_ary_hidden_new(128);
vm->loading_table = st_init_strtable();
vm->ci_table = st_init_table(&vm_ci_hashtype);
vm->frozen_strings = st_init_table_with_size(&rb_fstring_hash_type, 10000);
}

View File

@ -199,12 +199,13 @@ vm_ci_dump(const struct rb_callinfo *ci)
(((VALUE)(argc)) << CI_EMBED_ARGC_SHFT) | \
RUBY_FIXNUM_FLAG))
// vm_method.c
const struct rb_callinfo *rb_vm_ci_lookup(ID mid, unsigned int flag, unsigned int argc, const struct rb_callinfo_kwarg *kwarg);
void rb_vm_ci_free(const struct rb_callinfo *);
static inline const struct rb_callinfo *
vm_ci_new_(ID mid, unsigned int flag, unsigned int argc, const struct rb_callinfo_kwarg *kwarg, const char *file, int line)
{
if (kwarg) {
((struct rb_callinfo_kwarg *)kwarg)->references++;
}
if (USE_EMBED_CI && VM_CI_EMBEDDABLE_P(mid, flag, argc, kwarg)) {
RB_DEBUG_COUNTER_INC(ci_packed);
return vm_ci_new_id(mid, flag, argc, kwarg);
@ -213,13 +214,7 @@ vm_ci_new_(ID mid, unsigned int flag, unsigned int argc, const struct rb_callinf
const bool debug = 0;
if (debug) ruby_debug_printf("%s:%d ", file, line);
// TODO: dedup
const struct rb_callinfo *ci = (const struct rb_callinfo *)
rb_imemo_new(imemo_callinfo,
(VALUE)mid,
(VALUE)flag,
(VALUE)argc,
(VALUE)kwarg);
const struct rb_callinfo *ci = rb_vm_ci_lookup(mid, flag, argc, kwarg);
if (debug) rp(ci);
if (kwarg) {
RB_DEBUG_COUNTER_INC(ci_kw);

View File

@ -752,6 +752,7 @@ typedef struct rb_vm_struct {
const struct rb_builtin_function *builtin_function_table;
st_table *ci_table;
struct rb_id_table *negative_cme_table;
st_table *overloaded_cme_table; // cme -> overloaded_cme

View File

@ -333,6 +333,120 @@ invalidate_all_refinement_cc(void *vstart, void *vend, size_t stride, void *data
return 0; // continue to iteration
}
static st_index_t
vm_ci_hash(VALUE v)
{
const struct rb_callinfo *ci = (const struct rb_callinfo *)v;
st_index_t h;
h = rb_hash_start(ci->mid);
h = rb_hash_uint(h, ci->flag);
h = rb_hash_uint(h, ci->argc);
if (ci->kwarg) {
for (int i = 0; i < ci->kwarg->keyword_len; i++) {
h = rb_hash_uint(h, ci->kwarg->keywords[i]);
}
}
return h;
}
static int
vm_ci_hash_cmp(VALUE v1, VALUE v2)
{
const struct rb_callinfo *ci1 = (const struct rb_callinfo *)v1;
const struct rb_callinfo *ci2 = (const struct rb_callinfo *)v2;
if (ci1->mid != ci2->mid) return 1;
if (ci1->flag != ci2->flag) return 1;
if (ci1->argc != ci2->argc) return 1;
if (ci1->kwarg != NULL) {
VM_ASSERT(ci2->kwarg != NULL); // implied by matching flags
if (ci1->kwarg->keyword_len != ci2->kwarg->keyword_len)
return 1;
for (int i = 0; i < ci1->kwarg->keyword_len; i++) {
if (ci1->kwarg->keywords[i] != ci2->kwarg->keywords[i]) {
return 1;
}
}
} else {
VM_ASSERT(ci2->kwarg == NULL); // implied by matching flags
}
return 0;
}
static const struct st_hash_type vm_ci_hashtype = {
vm_ci_hash_cmp,
vm_ci_hash
};
static int
ci_lookup_i(st_data_t *key, st_data_t *value, st_data_t data, int existing)
{
const struct rb_callinfo *ci = (const struct rb_callinfo *)*key;
st_data_t *ret = (st_data_t *)data;
if (existing) {
if (rb_objspace_garbage_object_p((VALUE)ci)) {
*ret = (st_data_t)NULL;
return ST_DELETE;
} else {
*ret = *key;
return ST_STOP;
}
}
else {
*key = *value = *ret = (st_data_t)ci;
return ST_CONTINUE;
}
}
const struct rb_callinfo *
rb_vm_ci_lookup(ID mid, unsigned int flag, unsigned int argc, const struct rb_callinfo_kwarg *kwarg)
{
rb_vm_t *vm = GET_VM();
const struct rb_callinfo *ci = NULL;
if (kwarg) {
((struct rb_callinfo_kwarg *)kwarg)->references++;
}
const struct rb_callinfo *new_ci = (const struct rb_callinfo *)
rb_imemo_new(
imemo_callinfo,
(VALUE)mid,
(VALUE)flag,
(VALUE)argc,
(VALUE)kwarg);
RB_VM_LOCK_ENTER();
{
st_table *ci_table = vm->ci_table;
VM_ASSERT(ci_table);
do {
st_update(ci_table, (st_data_t)new_ci, ci_lookup_i, (st_data_t)&ci);
} while (ci == NULL);
}
RB_VM_LOCK_LEAVE();
VM_ASSERT(ci);
VM_ASSERT(vm_ci_markable(ci));
return ci;
}
void
rb_vm_ci_free(const struct rb_callinfo *ci)
{
rb_vm_t *vm = GET_VM();
RB_VM_LOCK_ENTER();
{
st_data_t key = (st_data_t)ci;
st_delete(vm->ci_table, &key, NULL);
}
RB_VM_LOCK_LEAVE();
}
void
rb_clear_all_refinement_method_cache(void)
{