Resize arrays in rb_ary_freeze and use it for freezing arrays

While working on a separate issue we found that in some cases
`ary_heap_realloc` was being called on frozen arrays. To fix this, this
change does the following:

1) Updates `rb_ary_freeze` to assert the type is an array, return if
already frozen, and shrink the capacity if it is not embedded, shared
or a shared root.
2) Replaces `rb_obj_freeze` with `rb_ary_freeze` when the object is
always an array.
3) In `ary_heap_realloc`, ensure the new capa is set with
`ARY_SET_CAPA`. Previously the change in capa was not set.
4) Adds an assertion to `ary_heap_realloc` that the array is not frozen.

Some of this work was originally done in
https://github.com/ruby/ruby/pull/2640, referencing this issue
https://bugs.ruby-lang.org/issues/16291. There didn't appear to be any
objections to this PR, it appears to have simply lost traction.

The original PR made changes to arrays and strings at the same time,
this PR only does arrays. Also it was old enough that rather than revive
that branch I've made a new one. I added Lourens as co-author in addtion
to Aaron who helped me with this patch.

The original PR made this change for performance reasons, and while
that's still true for this PR, the goal of this PR is to avoid
calling `ary_heap_realloc` on frozen arrays. The capacity should be
shrunk _before_ the array is frozen, not after.

Co-authored-by: Aaron Patterson <tenderlove@ruby-lang.org>
Co-Authored-By: methodmissing <lourens@methodmissing.com>
This commit is contained in:
eileencodes 2024-06-18 14:52:18 -04:00 committed by Aaron Patterson
parent cee62c6738
commit d25b74b32c
8 changed files with 29 additions and 16 deletions

19
array.c
View File

@ -370,6 +370,7 @@ ary_heap_free(VALUE ary)
static size_t
ary_heap_realloc(VALUE ary, size_t new_capa)
{
RUBY_ASSERT(!OBJ_FROZEN(ary));
SIZED_REALLOC_N(RARRAY(ary)->as.heap.ptr, VALUE, new_capa, ARY_HEAP_CAPA(ary));
ary_verify(ary);
@ -441,7 +442,10 @@ ary_shrink_capa(VALUE ary)
long old_capa = ARY_HEAP_CAPA(ary);
RUBY_ASSERT(!ARY_SHARED_P(ary));
RUBY_ASSERT(old_capa >= capacity);
if (old_capa > capacity) ary_heap_realloc(ary, capacity);
if (old_capa > capacity) {
size_t new_capa = ary_heap_realloc(ary, capacity);
ARY_SET_CAPA(ary, new_capa);
}
ary_verify(ary);
}
@ -639,6 +643,14 @@ ary_ensure_room_for_push(VALUE ary, long add_len)
VALUE
rb_ary_freeze(VALUE ary)
{
RUBY_ASSERT(RB_TYPE_P(ary, T_ARRAY));
if (OBJ_FROZEN(ary)) return ary;
if (!ARY_EMBED_P(ary) && !ARY_SHARED_P(ary) && !ARY_SHARED_ROOT_P(ary)) {
ary_shrink_capa(ary);
}
return rb_obj_freeze(ary);
}
@ -889,7 +901,7 @@ rb_setup_fake_ary(struct RArray *fake_ary, const VALUE *list, long len, bool fre
ARY_SET_PTR(ary, list);
ARY_SET_HEAP_LEN(ary, len);
ARY_SET_CAPA(ary, len);
if (freeze) OBJ_FREEZE(ary);
if (freeze) rb_ary_freeze(ary);
return ary;
}
@ -6458,7 +6470,7 @@ rb_ary_flatten_bang(int argc, VALUE *argv, VALUE ary)
if (result == ary) {
return Qnil;
}
if (!(mod = ARY_EMBED_P(result))) rb_obj_freeze(result);
if (!(mod = ARY_EMBED_P(result))) rb_ary_freeze(result);
rb_ary_replace(ary, result);
if (mod) ARY_SET_EMBED_LEN(result, 0);
@ -8754,6 +8766,7 @@ Init_Array(void)
rb_define_method(rb_cArray, "one?", rb_ary_one_p, -1);
rb_define_method(rb_cArray, "dig", rb_ary_dig, -1);
rb_define_method(rb_cArray, "sum", rb_ary_sum, -1);
rb_define_method(rb_cArray, "freeze", rb_ary_freeze, 0);
rb_define_method(rb_cArray, "deconstruct", rb_ary_deconstruct, 0);
}

2
ast.c
View File

@ -769,7 +769,7 @@ ast_node_all_tokens(rb_execution_context_t *ec, VALUE self)
token = rb_ary_new_from_args(4, INT2FIX(parser_token->id), ID2SYM(rb_intern(parser_token->type_name)), str, loc);
rb_ary_push(all_tokens, token);
}
rb_obj_freeze(all_tokens);
rb_ary_freeze(all_tokens);
return all_tokens;
}

View File

@ -4840,7 +4840,7 @@ static_literal_value(const NODE *node, rb_iseq_t *iseq)
if (ISEQ_COMPILE_DATA(iseq)->option->debug_frozen_string_literal || RTEST(ruby_debug)) {
VALUE debug_info = rb_ary_new_from_args(2, rb_iseq_path(iseq), INT2FIX((int)nd_line(node)));
VALUE lit = rb_str_dup(get_string_value(node));
rb_ivar_set(lit, id_debug_created_info, rb_obj_freeze(debug_info));
rb_ivar_set(lit, id_debug_created_info, rb_ary_freeze(debug_info));
return rb_str_freeze(lit);
}
else {
@ -10752,7 +10752,7 @@ iseq_compile_each0(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const no
if (ISEQ_COMPILE_DATA(iseq)->option->debug_frozen_string_literal || RTEST(ruby_debug)) {
VALUE debug_info = rb_ary_new_from_args(2, rb_iseq_path(iseq), INT2FIX(line));
lit = rb_str_dup(lit);
rb_ivar_set(lit, id_debug_created_info, rb_obj_freeze(debug_info));
rb_ivar_set(lit, id_debug_created_info, rb_ary_freeze(debug_info));
lit = rb_str_freeze(lit);
}
ADD_INSN1(ret, node, putobject, lit);
@ -11321,7 +11321,7 @@ rb_insns_name_array(void)
for (i = 0; i < VM_INSTRUCTION_SIZE; i++) {
rb_ary_push(ary, rb_fstring_cstr(insn_name(i)));
}
return rb_obj_freeze(ary);
return rb_ary_freeze(ary);
}
static LABEL *
@ -13709,7 +13709,7 @@ ibf_load_object_array(const struct ibf_load *load, const struct ibf_object_heade
rb_ary_push(ary, ibf_load_object(load, index));
}
if (header->frozen) rb_obj_freeze(ary);
if (header->frozen) rb_ary_freeze(ary);
return ary;
}

View File

@ -3181,7 +3181,7 @@ enum_chain_initialize(VALUE obj, VALUE enums)
if (!ptr) rb_raise(rb_eArgError, "unallocated chain");
ptr->enums = rb_obj_freeze(enums);
ptr->enums = rb_ary_freeze(enums);
ptr->pos = -1;
return obj;
@ -3509,7 +3509,7 @@ enum_product_initialize(int argc, VALUE *argv, VALUE obj)
if (!ptr) rb_raise(rb_eArgError, "unallocated product");
ptr->enums = rb_obj_freeze(enums);
ptr->enums = rb_ary_freeze(enums);
return obj;
}

View File

@ -57,7 +57,7 @@ static VALUE sym_hour, sym_min, sym_sec, sym_sec_fraction, sym_zone;
#define f_add3(x,y,z) f_add(f_add(x, y), z)
#define f_sub3(x,y,z) f_sub(f_sub(x, y), z)
#define f_frozen_ary(...) rb_obj_freeze(rb_ary_new3(__VA_ARGS__))
#define f_frozen_ary(...) rb_ary_freeze(rb_ary_new3(__VA_ARGS__))
static VALUE date_initialize(int argc, VALUE *argv, VALUE self);
static VALUE datetime_initialize(int argc, VALUE *argv, VALUE self);
@ -9466,7 +9466,7 @@ mk_ary_of_str(long len, const char *a[])
}
rb_ary_push(o, e);
}
rb_obj_freeze(o);
rb_ary_freeze(o);
return o;
}

2
iseq.c
View File

@ -521,7 +521,7 @@ rb_iseq_pathobj_new(VALUE path, VALUE realpath)
else {
if (!NIL_P(realpath)) realpath = rb_fstring(realpath);
pathobj = rb_ary_new_from_args(2, rb_fstring(path), realpath);
rb_obj_freeze(pathobj);
rb_ary_freeze(pathobj);
}
return pathobj;
}

2
load.c
View File

@ -104,7 +104,7 @@ rb_construct_expanded_load_path(rb_vm_t *vm, enum expand_type type, int *has_rel
if (NIL_P(expanded_path)) expanded_path = as_str;
rb_ary_push(ary, rb_fstring(expanded_path));
}
rb_obj_freeze(ary);
rb_ary_freeze(ary);
vm->expanded_load_path = ary;
rb_ary_replace(vm->load_path_snapshot, vm->load_path);
}

View File

@ -299,7 +299,7 @@ parse_static_literal_string(rb_iseq_t *iseq, const pm_scope_node_t *scope_node,
int line_number = pm_node_line_number(scope_node->parser, node);
VALUE debug_info = rb_ary_new_from_args(2, rb_iseq_path(iseq), INT2FIX(line_number));
value = rb_str_dup(value);
rb_ivar_set(value, id_debug_created_info, rb_obj_freeze(debug_info));
rb_ivar_set(value, id_debug_created_info, rb_ary_freeze(debug_info));
rb_str_freeze(value);
}
@ -693,7 +693,7 @@ pm_static_literal_string(rb_iseq_t *iseq, VALUE string, int line_number)
{
if (ISEQ_COMPILE_DATA(iseq)->option->debug_frozen_string_literal || RTEST(ruby_debug)) {
VALUE debug_info = rb_ary_new_from_args(2, rb_iseq_path(iseq), INT2FIX(line_number));
rb_ivar_set(string, id_debug_created_info, rb_obj_freeze(debug_info));
rb_ivar_set(string, id_debug_created_info, rb_ary_freeze(debug_info));
return rb_str_freeze(string);
}
else {