Speed up Ractors for Variable Width Allocation

This commit adds a Ractor cache for every size pool. Previously, all VWA
allocated objects used the slowpath and locked the VM.

On a micro-benchmark that benchmarks String allocation:

VWA turned off:
  29.196591   0.889709  30.086300 (  9.434059)

VWA before this commit:
  29.279486  41.477869  70.757355 ( 12.527379)

VWA after this commit:
  16.782903   0.557117  17.340020 (  4.255603)
This commit is contained in:
Peter Zhu 2021-11-19 14:51:58 -05:00
parent c14f230b26
commit 9aded89f40
Notes: git 2021-11-24 00:51:53 +09:00
4 changed files with 82 additions and 100 deletions

View File

@ -203,7 +203,9 @@ class_alloc(VALUE flags, VALUE klass)
RVARGC_NEWOBJ_OF(obj, struct RClass, klass, (flags & T_MASK) | FL_PROMOTED1 /* start from age == 2 */ | (RGENGC_WB_PROTECTED_CLASS ? FL_WB_PROTECTED : 0), alloc_size); RVARGC_NEWOBJ_OF(obj, struct RClass, klass, (flags & T_MASK) | FL_PROMOTED1 /* start from age == 2 */ | (RGENGC_WB_PROTECTED_CLASS ? FL_WB_PROTECTED : 0), alloc_size);
#if !USE_RVARGC #if USE_RVARGC
memset(RCLASS_EXT(obj), 0, sizeof(rb_classext_t));
#else
obj->ptr = ZALLOC(rb_classext_t); obj->ptr = ZALLOC(rb_classext_t);
#endif #endif

146
gc.c
View File

@ -659,11 +659,6 @@ typedef struct mark_stack {
size_t unused_cache_size; size_t unused_cache_size;
} mark_stack_t; } mark_stack_t;
#if USE_RVARGC
#define SIZE_POOL_COUNT 4
#else
#define SIZE_POOL_COUNT 1
#endif
#define SIZE_POOL_EDEN_HEAP(size_pool) (&(size_pool)->eden_heap) #define SIZE_POOL_EDEN_HEAP(size_pool) (&(size_pool)->eden_heap)
#define SIZE_POOL_TOMB_HEAP(size_pool) (&(size_pool)->tomb_heap) #define SIZE_POOL_TOMB_HEAP(size_pool) (&(size_pool)->tomb_heap)
@ -681,11 +676,6 @@ typedef struct rb_heap_struct {
} rb_heap_t; } rb_heap_t;
typedef struct rb_size_pool_struct { typedef struct rb_size_pool_struct {
#if USE_RVARGC
RVALUE *freelist;
struct heap_page *using_page;
#endif
short slot_size; short slot_size;
size_t allocatable_pages; size_t allocatable_pages;
@ -2325,7 +2315,7 @@ newobj_init(VALUE klass, VALUE flags, int wb_protected, rb_objspace_t *objspace,
static inline void heap_add_freepage(rb_heap_t *heap, struct heap_page *page); static inline void heap_add_freepage(rb_heap_t *heap, struct heap_page *page);
static struct heap_page *heap_next_freepage(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap); static struct heap_page *heap_next_freepage(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap);
static inline void ractor_set_cache(rb_ractor_t *cr, struct heap_page *page); static inline void ractor_set_cache(rb_ractor_t *cr, struct heap_page *page, size_t size_pool_idx);
size_t size_t
rb_gc_obj_slot_size(VALUE obj) rb_gc_obj_slot_size(VALUE obj)
@ -2355,17 +2345,14 @@ rb_gc_size_allocatable_p(size_t size)
} }
static inline VALUE static inline VALUE
ractor_cached_free_region(rb_objspace_t *objspace, rb_ractor_t *cr, size_t size) ractor_cached_free_region(rb_objspace_t *objspace, rb_ractor_t *cr, size_t size_pool_idx)
{ {
if (size > sizeof(RVALUE)) { rb_ractor_newobj_size_pool_cache_t *cache = &cr->newobj_cache.size_pool_caches[size_pool_idx];
return Qfalse; RVALUE *p = cache->freelist;
}
RVALUE *p = cr->newobj_cache.freelist;
if (p) { if (p) {
VALUE obj = (VALUE)p; VALUE obj = (VALUE)p;
cr->newobj_cache.freelist = p->as.free.next; cache->freelist = p->as.free.next;
asan_unpoison_object(obj, true); asan_unpoison_object(obj, true);
return obj; return obj;
} }
@ -2396,28 +2383,31 @@ heap_next_freepage(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t
} }
static inline void static inline void
ractor_set_cache(rb_ractor_t *cr, struct heap_page *page) ractor_set_cache(rb_ractor_t *cr, struct heap_page *page, size_t size_pool_idx)
{ {
gc_report(3, &rb_objspace, "ractor_set_cache: Using page %p\n", (void *)GET_PAGE_BODY(page->start)); gc_report(3, &rb_objspace, "ractor_set_cache: Using page %p\n", (void *)GET_PAGE_BODY(page->start));
cr->newobj_cache.using_page = page;
cr->newobj_cache.freelist = page->freelist; rb_ractor_newobj_size_pool_cache_t *cache = &cr->newobj_cache.size_pool_caches[size_pool_idx];
cache->using_page = page;
cache->freelist = page->freelist;
page->free_slots = 0; page->free_slots = 0;
page->freelist = NULL; page->freelist = NULL;
asan_unpoison_object((VALUE)cr->newobj_cache.freelist, false); asan_unpoison_object((VALUE)cache->freelist, false);
GC_ASSERT(RB_TYPE_P((VALUE)cr->newobj_cache.freelist, T_NONE)); GC_ASSERT(RB_TYPE_P((VALUE)cache->freelist, T_NONE));
asan_poison_object((VALUE)cr->newobj_cache.freelist); asan_poison_object((VALUE)cache->freelist);
} }
static inline void static inline void
ractor_cache_slots(rb_objspace_t *objspace, rb_ractor_t *cr) ractor_cache_slots(rb_objspace_t *objspace, rb_ractor_t *cr, size_t size_pool_idx)
{ {
ASSERT_vm_locking(); ASSERT_vm_locking();
rb_size_pool_t *size_pool = &size_pools[0]; rb_size_pool_t *size_pool = &size_pools[size_pool_idx];
struct heap_page *page = heap_next_freepage(objspace, size_pool, SIZE_POOL_EDEN_HEAP(size_pool)); struct heap_page *page = heap_next_freepage(objspace, size_pool, SIZE_POOL_EDEN_HEAP(size_pool));
ractor_set_cache(cr, page); ractor_set_cache(cr, page, size_pool_idx);
} }
static inline VALUE static inline VALUE
@ -2430,10 +2420,10 @@ newobj_fill(VALUE obj, VALUE v1, VALUE v2, VALUE v3)
return obj; return obj;
} }
#if USE_RVARGC static inline size_t
static inline rb_size_pool_t * size_pool_idx_for_size(size_t size)
size_pool_for_size(rb_objspace_t *objspace, size_t size)
{ {
#if USE_RVARGC
size_t slot_count = CEILDIV(size, sizeof(RVALUE)); size_t slot_count = CEILDIV(size, sizeof(RVALUE));
/* size_pool_idx is ceil(log2(slot_count)) */ /* size_pool_idx is ceil(log2(slot_count)) */
@ -2442,41 +2432,31 @@ size_pool_for_size(rb_objspace_t *objspace, size_t size)
rb_bug("size_pool_for_size: allocation size too large"); rb_bug("size_pool_for_size: allocation size too large");
} }
return size_pool_idx;
#else
GC_ASSERT(size <= sizeof(RVALUE));
return 0;
#endif
}
#if USE_RVARGC
static inline rb_size_pool_t *
size_pool_for_size(rb_objspace_t *objspace, size_t size)
{
size_t size_pool_idx = size_pool_idx_for_size(size);
rb_size_pool_t *size_pool = &size_pools[size_pool_idx]; rb_size_pool_t *size_pool = &size_pools[size_pool_idx];
GC_ASSERT(size_pool->slot_size >= (short)size); GC_ASSERT(size_pool->slot_size >= (short)size);
GC_ASSERT(size_pool_idx == 0 || size_pools[size_pool_idx - 1].slot_size < (short)size); GC_ASSERT(size_pool_idx == 0 || size_pools[size_pool_idx - 1].slot_size < (short)size);
return size_pool; return size_pool;
} }
static inline VALUE
heap_get_freeobj(rb_objspace_t *objspace, rb_size_pool_t *size_pool, rb_heap_t *heap)
{
RVALUE *p = size_pool->freelist;
if (UNLIKELY(p == NULL)) {
struct heap_page *page = heap_next_freepage(objspace, size_pool, heap);
size_pool->using_page = page;
asan_unpoison_memory_region(&page->freelist, sizeof(RVALUE*), false);
p = page->freelist;
page->freelist = NULL;
asan_poison_memory_region(&page->freelist, sizeof(RVALUE*));
page->free_slots = 0;
}
asan_unpoison_object((VALUE)p, true);
size_pool->freelist = p->as.free.next;
return (VALUE)p;
}
#endif #endif
ALWAYS_INLINE(static VALUE newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, int wb_protected, size_t alloc_size)); ALWAYS_INLINE(static VALUE newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, int wb_protected, size_t size_pool_idx));
static inline VALUE static inline VALUE
newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, int wb_protected, size_t alloc_size) newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, int wb_protected, size_t size_pool_idx)
{ {
VALUE obj; VALUE obj;
unsigned int lev; unsigned int lev;
@ -2497,22 +2477,9 @@ newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *
} }
} }
if (alloc_size <= sizeof(RVALUE)) {
// allocate new slot // allocate new slot
while ((obj = ractor_cached_free_region(objspace, cr, alloc_size)) == Qfalse) { while ((obj = ractor_cached_free_region(objspace, cr, size_pool_idx)) == Qfalse) {
ractor_cache_slots(objspace, cr); ractor_cache_slots(objspace, cr, size_pool_idx);
}
}
else {
#if USE_RVARGC
rb_size_pool_t *size_pool = size_pool_for_size(objspace, alloc_size);
obj = heap_get_freeobj(objspace, size_pool, SIZE_POOL_EDEN_HEAP(size_pool));
memset((void *)obj, 0, size_pool->slot_size);
#else
rb_bug("unreachable when not using rvargc");
#endif
} }
GC_ASSERT(obj != 0); GC_ASSERT(obj != 0);
newobj_init(klass, flags, wb_protected, objspace, obj); newobj_init(klass, flags, wb_protected, objspace, obj);
@ -2525,20 +2492,20 @@ newobj_slowpath(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *
} }
NOINLINE(static VALUE newobj_slowpath_wb_protected(VALUE klass, VALUE flags, NOINLINE(static VALUE newobj_slowpath_wb_protected(VALUE klass, VALUE flags,
rb_objspace_t *objspace, rb_ractor_t *cr, size_t alloc_size)); rb_objspace_t *objspace, rb_ractor_t *cr, size_t size_pool_idx));
NOINLINE(static VALUE newobj_slowpath_wb_unprotected(VALUE klass, VALUE flags, NOINLINE(static VALUE newobj_slowpath_wb_unprotected(VALUE klass, VALUE flags,
rb_objspace_t *objspace, rb_ractor_t *cr, size_t alloc_size)); rb_objspace_t *objspace, rb_ractor_t *cr, size_t size_pool_idx));
static VALUE static VALUE
newobj_slowpath_wb_protected(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, size_t alloc_size) newobj_slowpath_wb_protected(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, size_t size_pool_idx)
{ {
return newobj_slowpath(klass, flags, objspace, cr, TRUE, alloc_size); return newobj_slowpath(klass, flags, objspace, cr, TRUE, size_pool_idx);
} }
static VALUE static VALUE
newobj_slowpath_wb_unprotected(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, size_t alloc_size) newobj_slowpath_wb_unprotected(VALUE klass, VALUE flags, rb_objspace_t *objspace, rb_ractor_t *cr, size_t size_pool_idx)
{ {
return newobj_slowpath(klass, flags, objspace, cr, FALSE, alloc_size); return newobj_slowpath(klass, flags, objspace, cr, FALSE, size_pool_idx);
} }
static inline VALUE static inline VALUE
@ -2559,11 +2526,13 @@ newobj_of0(VALUE klass, VALUE flags, int wb_protected, rb_ractor_t *cr, size_t a
} }
#endif #endif
size_t size_pool_idx = size_pool_idx_for_size(alloc_size);
if ((!UNLIKELY(during_gc || if ((!UNLIKELY(during_gc ||
ruby_gc_stressful || ruby_gc_stressful ||
gc_event_hook_available_p(objspace)) && gc_event_hook_available_p(objspace)) &&
wb_protected && wb_protected &&
(obj = ractor_cached_free_region(objspace, cr, alloc_size)) != Qfalse)) { (obj = ractor_cached_free_region(objspace, cr, size_pool_idx)) != Qfalse)) {
newobj_init(klass, flags, wb_protected, objspace, obj); newobj_init(klass, flags, wb_protected, objspace, obj);
} }
@ -2571,8 +2540,8 @@ newobj_of0(VALUE klass, VALUE flags, int wb_protected, rb_ractor_t *cr, size_t a
RB_DEBUG_COUNTER_INC(obj_newobj_slowpath); RB_DEBUG_COUNTER_INC(obj_newobj_slowpath);
obj = wb_protected ? obj = wb_protected ?
newobj_slowpath_wb_protected(klass, flags, objspace, cr, alloc_size) : newobj_slowpath_wb_protected(klass, flags, objspace, cr, size_pool_idx) :
newobj_slowpath_wb_unprotected(klass, flags, objspace, cr, alloc_size); newobj_slowpath_wb_unprotected(klass, flags, objspace, cr, size_pool_idx);
} }
return obj; return obj;
@ -5585,13 +5554,6 @@ gc_sweep_start(rb_objspace_t *objspace)
for (int i = 0; i < SIZE_POOL_COUNT; i++) { for (int i = 0; i < SIZE_POOL_COUNT; i++) {
rb_size_pool_t *size_pool = &size_pools[i]; rb_size_pool_t *size_pool = &size_pools[i];
#if USE_RVARGC
heap_page_freelist_append(size_pool->using_page, size_pool->freelist);
size_pool->using_page = NULL;
size_pool->freelist = NULL;
#endif
gc_sweep_start_heap(objspace, SIZE_POOL_EDEN_HEAP(size_pool)); gc_sweep_start_heap(objspace, SIZE_POOL_EDEN_HEAP(size_pool));
} }
@ -8698,14 +8660,18 @@ rb_obj_gc_flags(VALUE obj, ID* flags, size_t max)
void void
rb_gc_ractor_newobj_cache_clear(rb_ractor_newobj_cache_t *newobj_cache) rb_gc_ractor_newobj_cache_clear(rb_ractor_newobj_cache_t *newobj_cache)
{ {
struct heap_page *page = newobj_cache->using_page; for (size_t size_pool_idx = 0; size_pool_idx < SIZE_POOL_COUNT; size_pool_idx++) {
RVALUE *freelist = newobj_cache->freelist; rb_ractor_newobj_size_pool_cache_t *cache = &newobj_cache->size_pool_caches[size_pool_idx];
struct heap_page *page = cache->using_page;
RVALUE *freelist = cache->freelist;
RUBY_DEBUG_LOG("ractor using_page:%p freelist:%p", (void *)page, (void *)freelist); RUBY_DEBUG_LOG("ractor using_page:%p freelist:%p", (void *)page, (void *)freelist);
heap_page_freelist_append(page, freelist); heap_page_freelist_append(page, freelist);
newobj_cache->using_page = NULL; cache->using_page = NULL;
newobj_cache->freelist = NULL; cache->freelist = NULL;
}
} }
void void

View File

@ -67,9 +67,19 @@ struct rb_objspace; /* in vm_core.h */
rb_obj_write((VALUE)(a), UNALIGNED_MEMBER_ACCESS((VALUE *)(slot)), \ rb_obj_write((VALUE)(a), UNALIGNED_MEMBER_ACCESS((VALUE *)(slot)), \
(VALUE)(b), __FILE__, __LINE__) (VALUE)(b), __FILE__, __LINE__)
typedef struct ractor_newobj_cache { #if USE_RVARGC
# define SIZE_POOL_COUNT 4
#else
# define SIZE_POOL_COUNT 1
#endif
typedef struct ractor_newobj_size_pool_cache {
struct RVALUE *freelist; struct RVALUE *freelist;
struct heap_page *using_page; struct heap_page *using_page;
} rb_ractor_newobj_size_pool_cache_t;
typedef struct ractor_newobj_cache {
rb_ractor_newobj_size_pool_cache_t size_pool_caches[SIZE_POOL_COUNT];
} rb_ractor_newobj_cache_t; } rb_ractor_newobj_cache_t;
/* gc.c */ /* gc.c */

View File

@ -868,7 +868,9 @@ static inline VALUE
empty_str_alloc(VALUE klass) empty_str_alloc(VALUE klass)
{ {
RUBY_DTRACE_CREATE_HOOK(STRING, 0); RUBY_DTRACE_CREATE_HOOK(STRING, 0);
return str_alloc_embed(klass, 0); VALUE str = str_alloc_embed(klass, 0);
memset(RSTRING(str)->as.embed.ary, 0, str_embed_capa(str));
return str;
} }
static VALUE static VALUE
@ -1732,10 +1734,11 @@ str_duplicate_setup(VALUE klass, VALUE str, VALUE dup)
VALUE flags = FL_TEST_RAW(str, flag_mask); VALUE flags = FL_TEST_RAW(str, flag_mask);
int encidx = 0; int encidx = 0;
if (STR_EMBED_P(str)) { if (STR_EMBED_P(str)) {
assert(str_embed_capa(dup) >= RSTRING_EMBED_LEN(str)); long len = RSTRING_EMBED_LEN(str);
STR_SET_EMBED_LEN(dup, RSTRING_EMBED_LEN(str));
MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary, assert(str_embed_capa(dup) >= len + 1);
char, RSTRING_EMBED_LEN(str)); STR_SET_EMBED_LEN(dup, len);
MEMCPY(RSTRING(dup)->as.embed.ary, RSTRING(str)->as.embed.ary, char, len + 1);
flags &= ~RSTRING_NOEMBED; flags &= ~RSTRING_NOEMBED;
} }
else { else {
@ -2321,6 +2324,7 @@ rb_str_times(VALUE str, VALUE times)
if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) { if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
if (STR_EMBEDDABLE_P(len, 1)) { if (STR_EMBEDDABLE_P(len, 1)) {
str2 = str_alloc_embed(rb_cString, len + 1); str2 = str_alloc_embed(rb_cString, len + 1);
memset(RSTRING_PTR(str2), 0, len + 1);
} }
else { else {
str2 = str_alloc_heap(rb_cString); str2 = str_alloc_heap(rb_cString);