[Feature #18634] Implement Arrays on Variable Width Allocation

This commit implements arrays on Variable Width Allocation. This allows
longer arrays to be embedded (i.e. contents directly follow the object
header) which improves performance through better cache locality.
This commit is contained in:
Peter Zhu 2022-03-15 09:34:07 -04:00
parent 414ad77142
commit a51f30c671
Notes: git 2022-03-22 22:43:04 +09:00
3 changed files with 176 additions and 38 deletions

191
array.c
View File

@ -139,7 +139,7 @@ should_not_be_shared_and_embedded(VALUE ary)
} \
} while (0)
#define ARY_CAPA(ary) (ARY_EMBED_P(ary) ? RARRAY_EMBED_LEN_MAX : \
#define ARY_CAPA(ary) (ARY_EMBED_P(ary) ? ary_embed_capa(ary) : \
ARY_SHARED_ROOT_P(ary) ? RARRAY_LEN(ary) : ARY_HEAP_CAPA(ary))
#define ARY_SET_CAPA(ary, n) do { \
assert(!ARY_EMBED_P(ary)); \
@ -157,7 +157,7 @@ should_not_be_shared_and_embedded(VALUE ary)
assert(ARY_SHARED_ROOT_P(_value_)); \
RB_OBJ_WRITE(_ary_, &RARRAY(_ary_)->as.heap.aux.shared_root, _value_); \
} while (0)
#define RARRAY_SHARED_ROOT_FLAG FL_USER5
#define RARRAY_SHARED_ROOT_FLAG FL_USER12
#define ARY_SHARED_ROOT_P(ary) (assert(should_be_T_ARRAY((VALUE)(ary))), \
FL_TEST_RAW((ary), RARRAY_SHARED_ROOT_FLAG))
#define ARY_SHARED_ROOT_REFCNT(ary) \
@ -184,6 +184,34 @@ ARY_SET(VALUE a, long i, VALUE v)
}
#undef RARRAY_ASET
static long
ary_embed_capa(VALUE ary)
{
#if USE_RVARGC
size_t size = rb_gc_obj_slot_size(ary) - offsetof(struct RArray, as.ary);
assert(size % sizeof(VALUE) == 0);
return size / sizeof(VALUE);
#else
return RARRAY_EMBED_LEN_MAX;
#endif
}
static size_t
ary_embed_size(long capa)
{
return offsetof(struct RArray, as.ary) + (sizeof(VALUE) * capa);
}
static bool
ary_embeddable_p(long capa)
{
#if USE_RVARGC
return rb_gc_size_allocatable_p(ary_embed_size(capa));
#else
return capa <= RARRAY_EMBED_LEN_MAX;
#endif
}
#if ARRAY_DEBUG
#define ary_verify(ary) ary_verify_(ary, __FILE__, __LINE__)
@ -205,7 +233,7 @@ ary_verify_(VALUE ary, const char *file, int line)
else if (ARY_EMBED_P(ary)) {
assert(!RARRAY_TRANSIENT_P(ary));
assert(!ARY_SHARED_P(ary));
assert(RARRAY_LEN(ary) <= RARRAY_EMBED_LEN_MAX);
assert(RARRAY_LEN(ary) <= ary_embed_capa(ary));
}
else {
#if 1
@ -447,7 +475,7 @@ ary_resize_capa(VALUE ary, long capacity)
assert(!OBJ_FROZEN(ary));
assert(!ARY_SHARED_P(ary));
if (capacity > RARRAY_EMBED_LEN_MAX) {
if (capacity > ary_embed_capa(ary)) {
size_t new_capa = capacity;
if (ARY_EMBED_P(ary)) {
long len = ARY_EMBED_LEN(ary);
@ -573,7 +601,7 @@ rb_ary_cancel_sharing(VALUE ary)
ary_verify(shared_root);
if (len <= RARRAY_EMBED_LEN_MAX) {
if (len <= ary_embed_capa(ary)) {
const VALUE *ptr = ARY_HEAP_PTR(ary);
FL_UNSET_SHARED(ary);
FL_SET_EMBED(ary);
@ -623,7 +651,7 @@ ary_ensure_room_for_push(VALUE ary, long add_len)
rb_raise(rb_eIndexError, "index %ld too big", new_len);
}
if (ARY_SHARED_P(ary)) {
if (new_len > RARRAY_EMBED_LEN_MAX) {
if (new_len > ary_embed_capa(ary)) {
VALUE shared_root = ARY_SHARED_ROOT(ary);
if (ARY_SHARED_ROOT_OCCUPIED(shared_root)) {
if (ARY_HEAP_PTR(ary) - RARRAY_CONST_PTR_TRANSIENT(shared_root) + new_len <= RARRAY_LEN(shared_root)) {
@ -699,9 +727,16 @@ rb_ary_shared_with_p(VALUE ary1, VALUE ary2)
}
static VALUE
ary_alloc(VALUE klass)
ary_alloc_embed(VALUE klass, long capa)
{
NEWOBJ_OF(ary, struct RArray, klass, T_ARRAY | RARRAY_EMBED_FLAG | (RGENGC_WB_PROTECTED_ARRAY ? FL_WB_PROTECTED : 0));
size_t size = ary_embed_size(capa);
assert(rb_gc_size_allocatable_p(size));
#if !USE_RVARGC
assert(size <= sizeof(struct RArray));
#endif
RVARGC_NEWOBJ_OF(ary, struct RArray, klass,
T_ARRAY | RARRAY_EMBED_FLAG | (RGENGC_WB_PROTECTED_ARRAY ? FL_WB_PROTECTED : 0),
size);
/* Created array is:
* FL_SET_EMBED((VALUE)ary);
* ARY_SET_EMBED_LEN((VALUE)ary, 0);
@ -709,11 +744,20 @@ ary_alloc(VALUE klass)
return (VALUE)ary;
}
static VALUE
ary_alloc_heap(VALUE klass)
{
RVARGC_NEWOBJ_OF(ary, struct RArray, klass,
T_ARRAY | (RGENGC_WB_PROTECTED_ARRAY ? FL_WB_PROTECTED : 0),
sizeof(struct RArray));
return (VALUE)ary;
}
static VALUE
empty_ary_alloc(VALUE klass)
{
RUBY_DTRACE_CREATE_HOOK(ARRAY, 0);
return ary_alloc(klass);
return ary_alloc_embed(klass, 0);
}
static VALUE
@ -730,10 +774,14 @@ ary_new(VALUE klass, long capa)
RUBY_DTRACE_CREATE_HOOK(ARRAY, capa);
ary = ary_alloc(klass);
if (capa > RARRAY_EMBED_LEN_MAX) {
if (ary_embeddable_p(capa)) {
ary = ary_alloc_embed(klass, capa);
}
else {
ary = ary_alloc_heap(klass);
assert(!ARY_EMBED_P(ary));
ptr = ary_heap_alloc(ary, capa);
FL_UNSET_EMBED(ary);
ARY_SET_PTR(ary, ptr);
ARY_SET_CAPA(ary, capa);
ARY_SET_HEAP_LEN(ary, 0);
@ -751,7 +799,7 @@ rb_ary_new_capa(long capa)
VALUE
rb_ary_new(void)
{
return rb_ary_new2(RARRAY_EMBED_LEN_MAX);
return rb_ary_new_capa(0);
}
VALUE
@ -794,9 +842,16 @@ rb_ary_new_from_values(long n, const VALUE *elts)
}
static VALUE
ec_ary_alloc(rb_execution_context_t *ec, VALUE klass)
ec_ary_alloc_embed(rb_execution_context_t *ec, VALUE klass, long capa)
{
RB_EC_NEWOBJ_OF(ec, ary, struct RArray, klass, T_ARRAY | RARRAY_EMBED_FLAG | (RGENGC_WB_PROTECTED_ARRAY ? FL_WB_PROTECTED : 0));
size_t size = ary_embed_size(capa);
assert(rb_gc_size_allocatable_p(size));
#if !USE_RVARGC
assert(size <= sizeof(struct RArray));
#endif
RB_RVARGC_EC_NEWOBJ_OF(ec, ary, struct RArray, klass,
T_ARRAY | RARRAY_EMBED_FLAG | (RGENGC_WB_PROTECTED_ARRAY ? FL_WB_PROTECTED : 0),
size);
/* Created array is:
* FL_SET_EMBED((VALUE)ary);
* ARY_SET_EMBED_LEN((VALUE)ary, 0);
@ -804,6 +859,15 @@ ec_ary_alloc(rb_execution_context_t *ec, VALUE klass)
return (VALUE)ary;
}
static VALUE
ec_ary_alloc_heap(rb_execution_context_t *ec, VALUE klass)
{
RB_RVARGC_EC_NEWOBJ_OF(ec, ary, struct RArray, klass,
T_ARRAY | (RGENGC_WB_PROTECTED_ARRAY ? FL_WB_PROTECTED : 0),
sizeof(struct RArray));
return (VALUE)ary;
}
static VALUE
ec_ary_new(rb_execution_context_t *ec, VALUE klass, long capa)
{
@ -818,11 +882,14 @@ ec_ary_new(rb_execution_context_t *ec, VALUE klass, long capa)
RUBY_DTRACE_CREATE_HOOK(ARRAY, capa);
ary = ec_ary_alloc(ec, klass);
if (ary_embeddable_p(capa)) {
ary = ec_ary_alloc_embed(ec, klass, capa);
}
else {
ary = ec_ary_alloc_heap(ec, klass);
assert(!ARY_EMBED_P(ary));
if (capa > RARRAY_EMBED_LEN_MAX) {
ptr = ary_heap_alloc(ary, capa);
FL_UNSET_EMBED(ary);
ARY_SET_PTR(ary, ptr);
ARY_SET_CAPA(ary, capa);
ARY_SET_HEAP_LEN(ary, 0);
@ -934,7 +1001,7 @@ ary_make_shared(VALUE ary)
else {
long capa = ARY_CAPA(ary), len = RARRAY_LEN(ary);
const VALUE *ptr;
NEWOBJ_OF(shared, struct RArray, 0, T_ARRAY | (RGENGC_WB_PROTECTED_ARRAY ? FL_WB_PROTECTED : 0));
VALUE shared = ary_alloc_heap(0);
VALUE vshared = (VALUE)shared;
rb_ary_transient_heap_evacuate(ary, TRUE);
@ -963,8 +1030,10 @@ ary_make_substitution(VALUE ary)
{
long len = RARRAY_LEN(ary);
if (len <= RARRAY_EMBED_LEN_MAX) {
VALUE subst = rb_ary_new2(len);
if (ary_embeddable_p(len)) {
VALUE subst = rb_ary_new_capa(len);
assert(ARY_EMBED_P(subst));
ary_memcpy(subst, 0, len, RARRAY_CONST_PTR_TRANSIENT(ary));
ARY_SET_EMBED_LEN(subst, len);
return subst;
@ -1025,6 +1094,30 @@ rb_ary_s_try_convert(VALUE dummy, VALUE ary)
return rb_check_array_type(ary);
}
/* :nodoc: */
static VALUE
rb_ary_s_new(int argc, VALUE *argv, VALUE klass)
{
VALUE ary;
if (klass == rb_cArray) {
long size = 0;
if (argc > 0 && FIXNUM_P(argv[0])) {
size = FIX2LONG(argv[0]);
if (size < 0) size = 0;
}
ary = ary_new(klass, size);
rb_obj_call_init_kw(ary, argc, argv, RB_PASS_CALLED_KEYWORDS);
}
else {
ary = rb_class_new_instance_pass_kw(argc, argv, klass);
}
return ary;
}
/*
* call-seq:
* Array.new -> new_empty_array
@ -1180,15 +1273,15 @@ ary_make_partial(VALUE ary, VALUE klass, long offset, long len)
assert(len >= 0);
assert(offset+len <= RARRAY_LEN(ary));
if (len <= RARRAY_EMBED_LEN_MAX) {
VALUE result = ary_alloc(klass);
if (ary_embeddable_p(len)) {
VALUE result = ary_alloc_embed(klass, len);
ary_memcpy(result, 0, len, RARRAY_CONST_PTR_TRANSIENT(ary) + offset);
ARY_SET_EMBED_LEN(result, len);
return result;
}
else {
VALUE shared, result = ary_alloc(klass);
FL_UNSET_EMBED(result);
VALUE shared, result = ary_alloc_heap(klass);
assert(!ARY_EMBED_P(result));
shared = ary_make_shared(ary);
ARY_SET_PTR(result, RARRAY_CONST_PTR_TRANSIENT(ary));
@ -1228,8 +1321,9 @@ ary_make_partial_step(VALUE ary, VALUE klass, long offset, long len, long step)
long i;
long j = offset + ((step > 0) ? 0 : (orig_len - 1));
VALUE result = ary_new(klass, len);
if (len <= RARRAY_EMBED_LEN_MAX) {
if (ARY_EMBED_P(result)) {
VALUE *ptr = (VALUE *)ARY_EMBED_PTR(result);
for (i = 0; i < len; ++i) {
RB_OBJ_WRITE(result, ptr+i, values[j]);
@ -1490,8 +1584,8 @@ rb_ary_behead(VALUE ary, long n)
rb_ary_modify_check(ary);
if (RB_UNLIKELY(!ARY_SHARED_P(ary))) {
if (RARRAY_LEN(ary) < ARY_DEFAULT_SIZE) {
if (!ARY_SHARED_P(ary)) {
if (ARY_EMBED_P(ary) || RARRAY_LEN(ary) < ARY_DEFAULT_SIZE) {
RARRAY_PTR_USE_TRANSIENT(ary, ptr, {
MEMMOVE(ptr, ptr + n, VALUE, RARRAY_LEN(ary) - n);
}); /* WB: no new reference */
@ -1546,7 +1640,7 @@ ary_modify_for_unshift(VALUE ary, int argc)
}
/* use shared array for big "queues" */
if (new_len > ARY_DEFAULT_SIZE * 4) {
if (new_len > ARY_DEFAULT_SIZE * 4 && !ARY_EMBED_P(ary)) {
ary_verify(ary);
/* make a room for unshifted items */
@ -2223,12 +2317,18 @@ rb_ary_resize(VALUE ary, long len)
else if (ARY_EMBED_P(ary)) {
ARY_SET_EMBED_LEN(ary, len);
}
else if (len <= RARRAY_EMBED_LEN_MAX) {
VALUE tmp[RARRAY_EMBED_LEN_MAX];
MEMCPY(tmp, ARY_HEAP_PTR(ary), VALUE, len);
ary_discard(ary);
MEMCPY((VALUE *)ARY_EMBED_PTR(ary), tmp, VALUE, len); /* WB: no new reference */
else if (len <= ary_embed_capa(ary)) {
const VALUE *ptr = ARY_HEAP_PTR(ary);
long ptr_capa = ARY_HEAP_SIZE(ary);
bool is_malloc_ptr = !ARY_SHARED_P(ary) && !RARRAY_TRANSIENT_P(ary);
FL_UNSET(ary, RARRAY_TRANSIENT_FLAG);
FL_SET_EMBED(ary);
MEMCPY((VALUE *)ARY_EMBED_PTR(ary), ptr, VALUE, len); /* WB: no new reference */
ARY_SET_EMBED_LEN(ary, len);
if (is_malloc_ptr) ruby_sized_xfree((void *)ptr, ptr_capa);
}
else {
if (olen > len + ARY_DEFAULT_SIZE) {
@ -4392,11 +4492,29 @@ rb_ary_replace(VALUE copy, VALUE orig)
rb_ary_reset(copy);
if (RARRAY_LEN(orig) <= RARRAY_EMBED_LEN_MAX) {
/* orig has enough space to embed the contents of orig. */
if (RARRAY_LEN(orig) <= ary_embed_capa(copy)) {
assert(ARY_EMBED_P(copy));
ary_memcpy(copy, 0, RARRAY_LEN(orig), RARRAY_CONST_PTR_TRANSIENT(orig));
ARY_SET_LEN(copy, RARRAY_LEN(orig));
ARY_SET_EMBED_LEN(copy, RARRAY_LEN(orig));
}
#if USE_RVARGC
/* orig is embedded but copy does not have enough space to embed the
* contents of orig. */
else if (ARY_EMBED_P(orig)) {
long len = ARY_EMBED_LEN(orig);
VALUE *ptr = ary_heap_alloc(copy, len);
MEMCPY(ptr, ARY_EMBED_PTR(orig), VALUE, len);
FL_UNSET_EMBED(copy);
ARY_SET_PTR(copy, ptr);
ARY_SET_LEN(copy, len);
ARY_SET_CAPA(copy, len);
}
#endif
/* Otherwise, orig is on heap and copy does not have enough space to embed
* the contents of orig. */
else {
VALUE shared_root = ary_make_shared(orig);
FL_UNSET_EMBED(copy);
@ -8222,6 +8340,7 @@ Init_Array(void)
rb_include_module(rb_cArray, rb_mEnumerable);
rb_define_alloc_func(rb_cArray, empty_ary_alloc);
rb_define_singleton_method(rb_cArray, "new", rb_ary_s_new, -1);
rb_define_singleton_method(rb_cArray, "[]", rb_ary_s_create, -1);
rb_define_singleton_method(rb_cArray, "try_convert", rb_ary_s_try_convert, 1);
rb_define_method(rb_cArray, "initialize", rb_ary_initialize, -1);

View File

@ -22,7 +22,7 @@
* In released versions of Ruby, this number should not be changed since teeny
* versions of Ruby should guarantee ABI compatibility.
*/
#define RUBY_ABI_VERSION 0
#define RUBY_ABI_VERSION 1
/* Windows does not support weak symbols so ruby_abi_version will not exist
* in the shared library. */

View File

@ -130,7 +130,13 @@ enum ruby_rarray_flags {
* 3rd parties must not be aware that there even is more than one way to
* store array elements. It was a bad idea to expose this to them.
*/
#if USE_RVARGC
RARRAY_EMBED_LEN_MASK = RUBY_FL_USER8 | RUBY_FL_USER7 | RUBY_FL_USER6 |
RUBY_FL_USER5 | RUBY_FL_USER4 | RUBY_FL_USER3
#else
RARRAY_EMBED_LEN_MASK = RUBY_FL_USER4 | RUBY_FL_USER3
#endif
#if USE_TRANSIENT_HEAP
,
@ -156,10 +162,14 @@ enum ruby_rarray_flags {
*/
enum ruby_rarray_consts {
/** Where ::RARRAY_EMBED_LEN_MASK resides. */
RARRAY_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 3,
RARRAY_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 3
#if !USE_RVARGC
,
/** Max possible number elements that can be embedded. */
RARRAY_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(VALUE)
#endif
};
/** Ruby's array. */
@ -218,7 +228,16 @@ struct RArray {
* to store its elements. In this case the length is encoded into the
* flags.
*/
#if USE_RVARGC
/* This is a length 1 array because:
* 1. GCC has a bug that does not optimize C flexible array members
* (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102452)
* 2. Zero length arrays are not supported by all compilers
*/
const VALUE ary[1];
#else
const VALUE ary[RARRAY_EMBED_LEN_MAX];
#endif
} as;
};