diff --git a/gc.c b/gc.c index d1fc9085bc..a9167e2ede 100644 --- a/gc.c +++ b/gc.c @@ -157,6 +157,68 @@ #define MAP_ANONYMOUS MAP_ANON #endif + +static size_t malloc_offset = 0; +#if defined(HAVE_MALLOC_USABLE_SIZE) +static size_t +gc_compute_malloc_offset(void) +{ + // Different allocators use different metadata storage strategies which result in different + // ideal sizes. + // For instance malloc(64) will waste 8B with glibc, but waste 0B with jemalloc. + // But malloc(56) will waste 0B with glibc, but waste 8B with jemalloc. + // So we try allocating 64, 56 and 48 bytes and select the first offset that doesn't + // waste memory. + // This was tested on Linux with glibc 2.35 and jemalloc 5, and for both it result in + // no wasted memory. + size_t offset = 0; + for (offset = 0; offset <= 16; offset += 8) { + size_t allocated = (64 - offset); + void *test_ptr = malloc(allocated); + size_t wasted = malloc_usable_size(test_ptr) - allocated; + free(test_ptr); + + if (wasted == 0) { + return offset; + } + } + return 0; +} +#else +static size_t +gc_compute_malloc_offset(void) +{ + // If we don't have malloc_usable_size, we use powers of 2. + return 0; +} +#endif + +size_t +rb_malloc_grow_capa(size_t current, size_t type_size) +{ + size_t current_capacity = current; + if (current_capacity < 4) { + current_capacity = 4; + } + current_capacity *= type_size; + + // We double the current capacity. + size_t new_capacity = (current_capacity * 2); + + // And round up to the next power of 2 if it's not already one. + if (rb_popcount64(new_capacity) != 1) { + new_capacity = (size_t)(1 << (64 - nlz_int64(new_capacity))); + } + + new_capacity -= malloc_offset; + new_capacity /= type_size; + if (current > new_capacity) { + rb_bug("rb_malloc_grow_capa: current_capacity=%zu, new_capacity=%zu, malloc_offset=%zu", current, new_capacity, malloc_offset); + } + RUBY_ASSERT(new_capacity > current); + return new_capacity; +} + static inline struct rbimpl_size_mul_overflow_tag size_add_overflow(size_t x, size_t y) { @@ -13979,6 +14041,8 @@ void Init_GC(void) { #undef rb_intern + malloc_offset = gc_compute_malloc_offset(); + VALUE rb_mObjSpace; VALUE rb_mProfiler; VALUE gc_constants; diff --git a/internal/gc.h b/internal/gc.h index f8f88a41cb..188497b007 100644 --- a/internal/gc.h +++ b/internal/gc.h @@ -227,6 +227,7 @@ __attribute__((__alloc_align__(1))) RUBY_ATTR_MALLOC void *rb_aligned_malloc(size_t, size_t) RUBY_ATTR_ALLOC_SIZE((2)); size_t rb_size_mul_or_raise(size_t, size_t, VALUE); /* used in compile.c */ size_t rb_size_mul_add_or_raise(size_t, size_t, size_t, VALUE); /* used in iseq.h */ +size_t rb_malloc_grow_capa(size_t current_capacity, size_t type_size); RUBY_ATTR_MALLOC void *rb_xmalloc_mul_add(size_t, size_t, size_t); RUBY_ATTR_MALLOC void *rb_xcalloc_mul_add(size_t, size_t, size_t); void *rb_xrealloc_mul_add(const void *, size_t, size_t, size_t); diff --git a/shape.c b/shape.c index 89a2c3bd0b..36263ea841 100644 --- a/shape.c +++ b/shape.c @@ -418,19 +418,21 @@ rb_shape_get_next(rb_shape_t* shape, VALUE obj, ID id) } static inline rb_shape_t * -rb_shape_transition_shape_capa_create(rb_shape_t* shape, uint32_t new_capacity) +rb_shape_transition_shape_capa_create(rb_shape_t* shape, size_t new_capacity) { + RUBY_ASSERT(new_capacity < (size_t)MAX_IVARS); + ID edge_name = rb_make_temporary_id(new_capacity); bool dont_care; rb_shape_t * new_shape = get_next_shape_internal(shape, edge_name, SHAPE_CAPACITY_CHANGE, &dont_care, true, false); - new_shape->capacity = new_capacity; + new_shape->capacity = (uint32_t)new_capacity; return new_shape; } rb_shape_t * rb_shape_transition_shape_capa(rb_shape_t* shape) { - return rb_shape_transition_shape_capa_create(shape, shape->capacity * 2); + return rb_shape_transition_shape_capa_create(shape, rb_malloc_grow_capa(shape->capacity, sizeof(VALUE))); } bool @@ -833,7 +835,7 @@ Init_default_shapes(void) // Shapes by size pool for (int i = 1; i < SIZE_POOL_COUNT; i++) { - uint32_t capa = (uint32_t)((rb_size_pool_slot_size(i) - offsetof(struct RObject, as.ary)) / sizeof(VALUE)); + size_t capa = ((rb_size_pool_slot_size(i) - offsetof(struct RObject, as.ary)) / sizeof(VALUE)); rb_shape_t * new_shape = rb_shape_transition_shape_capa_create(root, capa); new_shape->type = SHAPE_INITIAL_CAPACITY; new_shape->size_pool_index = i;