From bfb395c620b811b4b3cb7d535d58721268af285d Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 24 Jul 2023 14:21:50 -0400 Subject: [PATCH] Implement weak references in the GC [Feature #19783] This commit adds support for weak references in the GC through the function `rb_gc_mark_weak`. Unlike strong references, weak references does not mark the object, but rather lets the GC know that an object refers to another one. If the child object is freed, the pointer from the parent object is overwritten with `Qundef`. Co-Authored-By: Jean Boussier --- common.mk | 2 ++ darray.h | 63 ++++++++++++++++++++++++++++++++++++++------------- gc.c | 48 +++++++++++++++++++++++++++++++++++++++ internal/gc.h | 2 ++ 4 files changed, 99 insertions(+), 16 deletions(-) diff --git a/common.mk b/common.mk index 92e2eddb4d..b0f9369fbc 100644 --- a/common.mk +++ b/common.mk @@ -6980,6 +6980,7 @@ gc.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h gc.$(OBJEXT): {$(VPATH)}builtin.h gc.$(OBJEXT): {$(VPATH)}config.h gc.$(OBJEXT): {$(VPATH)}constant.h +gc.$(OBJEXT): {$(VPATH)}darray.h gc.$(OBJEXT): {$(VPATH)}debug.h gc.$(OBJEXT): {$(VPATH)}debug_counter.h gc.$(OBJEXT): {$(VPATH)}defines.h @@ -8401,6 +8402,7 @@ load.$(OBJEXT): $(CCAN_DIR)/str/str.h load.$(OBJEXT): $(hdrdir)/ruby/ruby.h load.$(OBJEXT): $(top_srcdir)/internal/array.h load.$(OBJEXT): $(top_srcdir)/internal/basic_operators.h +load.$(OBJEXT): $(top_srcdir)/internal/bits.h load.$(OBJEXT): $(top_srcdir)/internal/compilers.h load.$(OBJEXT): $(top_srcdir)/internal/dir.h load.$(OBJEXT): $(top_srcdir)/internal/error.h diff --git a/darray.h b/darray.h index b7ab0ab154..8e1e576355 100644 --- a/darray.h +++ b/darray.h @@ -5,6 +5,7 @@ #include #include +#include "internal/bits.h" #include "internal/gc.h" // Type for a dynamic array. Use to declare a dynamic array. @@ -85,6 +86,15 @@ rb_darray_make_impl((ptr_to_ary), size, sizeof(**(ptr_to_ary)), \ sizeof((*(ptr_to_ary))->data[0]), rb_darray_calloc_mul_add_without_gc) +/* Resize the darray to a new capacity. The new capacity must be greater than + * or equal to the size of the darray. + * + * void rb_darray_resize_capa(rb_darray(T) *ptr_to_ary, size_t capa); + */ +#define rb_darray_resize_capa_without_gc(ptr_to_ary, capa) \ + rb_darray_resize_capa_impl((ptr_to_ary), rb_darray_next_power_of_two(capa), sizeof(**(ptr_to_ary)), \ + sizeof((*(ptr_to_ary))->data[0]), rb_darray_realloc_mul_add_without_gc) + #define rb_darray_data_ptr(ary) ((ary)->data) typedef struct rb_darray_meta { @@ -92,9 +102,8 @@ typedef struct rb_darray_meta { size_t capa; } rb_darray_meta_t; -// Set the size of the array to zero without freeing the backing memory. -// Allows reusing the same array. -// +/* Set the size of the array to zero without freeing the backing memory. + * Allows reusing the same array. */ static inline void rb_darray_clear(void *ary) { @@ -162,6 +171,40 @@ rb_darray_realloc_mul_add_without_gc(const void *orig_ptr, size_t x, size_t y, s return ptr; } +/* Internal function. Returns the next power of two that is greater than or + * equal to n. */ +static inline size_t +rb_darray_next_power_of_two(size_t n) +{ + return (size_t)(1 << (64 - nlz_int64(n))); +} + +/* Internal function. Resizes the capacity of a darray. The new capacity must + * be greater than or equal to the size of the darray. */ +static inline void +rb_darray_resize_capa_impl(void *ptr_to_ary, size_t new_capa, size_t header_size, size_t element_size, + void *(*realloc_mul_add_impl)(const void *, size_t, size_t, size_t)) +{ + rb_darray_meta_t **ptr_to_ptr_to_meta = ptr_to_ary; + rb_darray_meta_t *meta = *ptr_to_ptr_to_meta; + + rb_darray_meta_t *new_ary = realloc_mul_add_impl(meta, new_capa, element_size, header_size); + + if (meta == NULL) { + /* First allocation. Initialize size. On subsequence allocations + * realloc takes care of carrying over the size. */ + new_ary->size = 0; + } + + assert(new_ary->size <= new_capa); + + new_ary->capa = new_capa; + + // We don't have access to the type of the dynamic array in function context. + // Write out result with memcpy to avoid strict aliasing issue. + memcpy(ptr_to_ary, &new_ary, sizeof(new_ary)); +} + // Internal function // Ensure there is space for one more element. // Note: header_size can be bigger than sizeof(rb_darray_meta_t) when T is __int128_t, for example. @@ -177,19 +220,7 @@ rb_darray_ensure_space(void *ptr_to_ary, size_t header_size, size_t element_size // Double the capacity size_t new_capa = current_capa == 0 ? 1 : current_capa * 2; - rb_darray_meta_t *doubled_ary = realloc_mul_add_impl(meta, new_capa, element_size, header_size); - - if (meta == NULL) { - // First allocation. Initialize size. On subsequence allocations - // realloc takes care of carrying over the size. - doubled_ary->size = 0; - } - - doubled_ary->capa = new_capa; - - // We don't have access to the type of the dynamic array in function context. - // Write out result with memcpy to avoid strict aliasing issue. - memcpy(ptr_to_ary, &doubled_ary, sizeof(doubled_ary)); + rb_darray_resize_capa_impl(ptr_to_ary, new_capa, header_size, element_size, realloc_mul_add_impl); } static inline void diff --git a/gc.c b/gc.c index d8063a5cbd..38fe1a350e 100644 --- a/gc.c +++ b/gc.c @@ -95,6 +95,7 @@ #undef LIST_HEAD /* ccan/list conflicts with BSD-origin sys/queue.h. */ #include "constant.h" +#include "darray.h" #include "debug_counter.h" #include "eval_intern.h" #include "id_table.h" @@ -869,6 +870,8 @@ typedef struct rb_objspace { #if GC_DEBUG_STRESS_TO_CLASS VALUE stress_to_class; #endif + + rb_darray(VALUE *) weak_references; } rb_objspace_t; @@ -1831,6 +1834,8 @@ rb_objspace_alloc(void) ccan_list_head_init(&SIZE_POOL_TOMB_HEAP(size_pool)->pages); } + rb_darray_make_without_gc(&objspace->weak_references, 0); + dont_gc_on(); return objspace; @@ -1879,6 +1884,8 @@ rb_objspace_free(rb_objspace_t *objspace) free_stack_chunks(&objspace->mark_stack); mark_stack_free_cache(&objspace->mark_stack); + rb_darray_free_without_gc(objspace->weak_references); + free(objspace); } @@ -6878,6 +6885,23 @@ rb_gc_mark_and_move(VALUE *ptr) } } +void +rb_gc_mark_weak(VALUE *ptr) +{ + rb_objspace_t *objspace = &rb_objspace; + + if (UNLIKELY(!during_gc)) return; + + VALUE obj = *ptr; + if (RB_SPECIAL_CONST_P(obj)) return; + + GC_ASSERT(objspace->rgengc.parent_object == 0 || FL_TEST(objspace->rgengc.parent_object, FL_WB_PROTECTED)); + + rgengc_check_relation(objspace, obj); + + rb_darray_append_without_gc(&objspace->weak_references, ptr); +} + /* CAUTION: THIS FUNCTION ENABLE *ONLY BEFORE* SWEEPING. * This function is only for GC_END_MARK timing. */ @@ -8099,6 +8123,28 @@ gc_marks_wb_unprotected_objects(rb_objspace_t *objspace, rb_heap_t *heap) gc_mark_stacked_objects_all(objspace); } +static void +gc_update_weak_references(rb_objspace_t *objspace) +{ + size_t retained_weak_references_count = 0; + VALUE **ptr_ptr; + rb_darray_foreach(objspace->weak_references, i, ptr_ptr) { + VALUE obj = **ptr_ptr; + + if (RB_SPECIAL_CONST_P(obj)) continue; + + if (!RVALUE_MARKED(obj)) { + **ptr_ptr = Qundef; + } + else { + retained_weak_references_count++; + } + } + + rb_darray_clear(objspace->weak_references); + rb_darray_resize_capa_without_gc(&objspace->weak_references, retained_weak_references_count); +} + static void gc_marks_finish(rb_objspace_t *objspace) { @@ -8125,6 +8171,8 @@ gc_marks_finish(rb_objspace_t *objspace) } } + gc_update_weak_references(objspace); + #if RGENGC_CHECK_MODE >= 2 gc_verify_internal_consistency(objspace); #endif diff --git a/internal/gc.h b/internal/gc.h index 2934422474..7e0942a76a 100644 --- a/internal/gc.h +++ b/internal/gc.h @@ -235,6 +235,8 @@ VALUE rb_define_finalizer_no_check(VALUE obj, VALUE block); void rb_gc_mark_and_move(VALUE *ptr); +void rb_gc_mark_weak(VALUE *ptr); + #define rb_gc_mark_and_move_ptr(ptr) do { \ VALUE _obj = (VALUE)*(ptr); \ rb_gc_mark_and_move(&_obj); \