Ractor: Fix moving embedded objects

[Bug #20271]
[Bug #20267]
[Bug #20255]

`rb_obj_alloc(RBASIC_CLASS(obj))` will always allocate from the basic
40B pool, so if `obj` is larger than `40B`, we'll create a corrupted
object when we later copy the shape_id.

Instead we can use the same logic than ractor copy, which is
to use `rb_obj_clone`, and later ask the GC to free the original
object.

We then must turn it into a `T_OBJECT`, because otherwise
just changing its class to `RactorMoved` leaves a lot of
ways to keep using the object, e.g.:

```
a = [1, 2, 3]
Ractor.new{}.send(a, move: true)
[].concat(a) # Should raise, but wasn't.
```

If it turns out that `rb_obj_clone` isn't performant enough
for some uses, we can always have carefully crafted specialized
paths for the types that would benefit from it.
This commit is contained in:
Jean Boussier 2025-03-27 14:26:59 +01:00
parent 532b9246d3
commit 0350290262
Notes: git 2025-03-31 10:02:13 +00:00
9 changed files with 218 additions and 104 deletions

View File

@ -1987,3 +1987,127 @@ assert_equal 'ok', %q{
GC.start
:ok.itself
}
# moved objects being corrupted if embeded (String)
assert_equal 'ok', %q{
ractor = Ractor.new { Ractor.receive }
obj = "foobarbazfoobarbazfoobarbazfoobarbaz"
ractor.send(obj.dup, move: true)
roundtripped_obj = ractor.take
roundtripped_obj == obj ? :ok : roundtripped_obj
}
# moved objects being corrupted if embeded (Array)
assert_equal 'ok', %q{
ractor = Ractor.new { Ractor.receive }
obj = Array.new(10, 42)
ractor.send(obj.dup, move: true)
roundtripped_obj = ractor.take
roundtripped_obj == obj ? :ok : roundtripped_obj
}
# moved objects being corrupted if embeded (Hash)
assert_equal 'ok', %q{
ractor = Ractor.new { Ractor.receive }
obj = { foo: 1, bar: 2 }
ractor.send(obj.dup, move: true)
roundtripped_obj = ractor.take
roundtripped_obj == obj ? :ok : roundtripped_obj
}
# moved objects being corrupted if embeded (MatchData)
assert_equal 'ok', %q{
ractor = Ractor.new { Ractor.receive }
obj = "foo".match(/o/)
ractor.send(obj.dup, move: true)
roundtripped_obj = ractor.take
roundtripped_obj == obj ? :ok : roundtripped_obj
}
# moved objects being corrupted if embeded (Struct)
assert_equal 'ok', %q{
ractor = Ractor.new { Ractor.receive }
obj = Struct.new(:a, :b, :c, :d, :e, :f).new(1, 2, 3, 4, 5, 6)
ractor.send(obj.dup, move: true)
roundtripped_obj = ractor.take
roundtripped_obj == obj ? :ok : roundtripped_obj
}
# moved objects being corrupted if embeded (Object)
assert_equal 'ok', %q{
ractor = Ractor.new { Ractor.receive }
class SomeObject
attr_reader :a, :b, :c, :d, :e, :f
def initialize
@a = @b = @c = @d = @e = @f = 1
end
def ==(o)
@a == o.a &&
@b == o.b &&
@c == o.c &&
@d == o.d &&
@e == o.e &&
@f == o.f
end
end
SomeObject.new # initial non-embeded
obj = SomeObject.new
ractor.send(obj.dup, move: true)
roundtripped_obj = ractor.take
roundtripped_obj == obj ? :ok : roundtripped_obj
}
# moved arrays can't be used
assert_equal 'ok', %q{
ractor = Ractor.new { Ractor.receive }
obj = [1]
ractor.send(obj, move: true)
begin
[].concat(obj)
rescue TypeError
:ok
else
:fail
end
}
# moved strings can't be used
assert_equal 'ok', %q{
ractor = Ractor.new { Ractor.receive }
obj = "hello"
ractor.send(obj, move: true)
begin
"".replace(obj)
rescue TypeError
:ok
else
:fail
end
}
# moved hashes can't be used
assert_equal 'ok', %q{
ractor = Ractor.new { Ractor.receive }
obj = { a: 1 }
ractor.send(obj, move: true)
begin
{}.merge(obj)
rescue TypeError
:ok
else
:fail
end
}
# moved objects keep their object_id
assert_equal 'ok', %q{
ractor = Ractor.new { Ractor.receive }
obj = Object.new
id = obj.object_id
ractor.send(obj, move: true)
roundtripped_obj = ractor.take
roundtripped_obj.object_id == id ? :ok : :fail
}

View File

@ -13531,6 +13531,7 @@ ractor.$(OBJEXT): $(top_srcdir)/internal/gc.h
ractor.$(OBJEXT): $(top_srcdir)/internal/hash.h
ractor.$(OBJEXT): $(top_srcdir)/internal/imemo.h
ractor.$(OBJEXT): $(top_srcdir)/internal/numeric.h
ractor.$(OBJEXT): $(top_srcdir)/internal/object.h
ractor.$(OBJEXT): $(top_srcdir)/internal/ractor.h
ractor.$(OBJEXT): $(top_srcdir)/internal/rational.h
ractor.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h

17
gc.c
View File

@ -665,6 +665,7 @@ typedef struct gc_function_map {
// Object ID
VALUE (*object_id)(void *objspace_ptr, VALUE obj);
VALUE (*object_id_to_ref)(void *objspace_ptr, VALUE object_id);
void (*object_id_move)(void *objspace_ptr, VALUE dest, VALUE src);
// Forking
void (*before_fork)(void *objspace_ptr);
void (*after_fork)(void *objspace_ptr, rb_pid_t pid);
@ -842,6 +843,7 @@ ruby_modular_gc_init(void)
// Object ID
load_modular_gc_func(object_id);
load_modular_gc_func(object_id_to_ref);
load_modular_gc_func(object_id_move);
// Forking
load_modular_gc_func(before_fork);
load_modular_gc_func(after_fork);
@ -925,6 +927,7 @@ ruby_modular_gc_init(void)
// Object ID
# define rb_gc_impl_object_id rb_gc_functions.object_id
# define rb_gc_impl_object_id_to_ref rb_gc_functions.object_id_to_ref
# define rb_gc_impl_object_id_move rb_gc_functions.object_id_move
// Forking
# define rb_gc_impl_before_fork rb_gc_functions.before_fork
# define rb_gc_impl_after_fork rb_gc_functions.after_fork
@ -966,7 +969,6 @@ rb_objspace_alloc(void)
void *objspace = rb_gc_impl_objspace_alloc();
ruby_current_vm_ptr->gc.objspace = objspace;
rb_gc_impl_objspace_init(objspace);
rb_gc_impl_stress_set(objspace, initial_stress);
@ -2659,6 +2661,19 @@ rb_gc_mark_roots(void *objspace, const char **categoryp)
#define TYPED_DATA_REFS_OFFSET_LIST(d) (size_t *)(uintptr_t)RTYPEDDATA(d)->type->function.dmark
void
rb_gc_ractor_moved(VALUE dest, VALUE src)
{
void *objspace = rb_gc_get_objspace();
if (UNLIKELY(FL_TEST_RAW(src, FL_SEEN_OBJ_ID))) {
rb_gc_impl_object_id_move(objspace, dest, src);
}
rb_gc_obj_free(objspace, src);
MEMZERO((void *)src, char, rb_gc_obj_slot_size(src));
RBASIC(src)->flags = T_OBJECT | FL_FREEZE; // Avoid mutations using bind_call, etc.
}
void
rb_gc_mark_children(void *objspace, VALUE obj)
{

View File

@ -1564,25 +1564,6 @@ rb_gc_impl_garbage_object_p(void *objspace_ptr, VALUE ptr)
!RVALUE_MARKED(objspace, ptr);
}
VALUE
rb_gc_impl_object_id_to_ref(void *objspace_ptr, VALUE object_id)
{
rb_objspace_t *objspace = objspace_ptr;
VALUE obj;
if (st_lookup(objspace->id_to_obj_tbl, object_id, &obj) &&
!rb_gc_impl_garbage_object_p(objspace, obj)) {
return obj;
}
if (rb_funcall(object_id, rb_intern(">="), 1, ULL2NUM(objspace->next_object_id))) {
rb_raise(rb_eRangeError, "%+"PRIsVALUE" is not id value", rb_funcall(object_id, rb_intern("to_s"), 1, INT2FIX(10)));
}
else {
rb_raise(rb_eRangeError, "%+"PRIsVALUE" is recycled object", rb_funcall(object_id, rb_intern("to_s"), 1, INT2FIX(10)));
}
}
VALUE
rb_gc_impl_object_id(void *objspace_ptr, VALUE obj)
{
@ -1614,6 +1595,46 @@ rb_gc_impl_object_id(void *objspace_ptr, VALUE obj)
return id;
}
VALUE
rb_gc_impl_object_id_to_ref(void *objspace_ptr, VALUE object_id)
{
rb_objspace_t *objspace = objspace_ptr;
VALUE obj;
if (st_lookup(objspace->id_to_obj_tbl, object_id, &obj) &&
!rb_gc_impl_garbage_object_p(objspace, obj)) {
return obj;
}
if (rb_funcall(object_id, rb_intern(">="), 1, ULL2NUM(objspace->next_object_id))) {
rb_raise(rb_eRangeError, "%+"PRIsVALUE" is not id value", rb_funcall(object_id, rb_intern("to_s"), 1, INT2FIX(10)));
}
else {
rb_raise(rb_eRangeError, "%+"PRIsVALUE" is recycled object", rb_funcall(object_id, rb_intern("to_s"), 1, INT2FIX(10)));
}
}
void
rb_gc_impl_object_id_move(void *objspace_ptr, VALUE dest, VALUE src)
{
/* If the source object's object_id has been seen, we need to update
* the object to object id mapping. */
st_data_t id = 0;
rb_objspace_t *objspace = objspace_ptr;
unsigned int lev = rb_gc_vm_lock();
st_data_t key = (st_data_t)src;
if (!st_delete(objspace->obj_to_id_tbl, &key, &id)) {
rb_bug("gc_move: object ID seen, but not in mapping table: %s", rb_obj_info(src));
}
FL_UNSET_RAW(src, FL_SEEN_OBJ_ID);
st_insert(objspace->obj_to_id_tbl, (st_data_t)dest, id);
st_insert(objspace->id_to_obj_tbl, id, (st_data_t)dest);
FL_SET_RAW(dest, FL_SEEN_OBJ_ID);
rb_gc_vm_unlock(lev);
}
static void free_stack_chunks(mark_stack_t *);
static void mark_stack_free_cache(mark_stack_t *);
static void heap_page_free(rb_objspace_t *objspace, struct heap_page *page);

View File

@ -103,6 +103,7 @@ GC_IMPL_FN void rb_gc_impl_shutdown_call_finalizer(void *objspace_ptr);
// Object ID
GC_IMPL_FN VALUE rb_gc_impl_object_id(void *objspace_ptr, VALUE obj);
GC_IMPL_FN VALUE rb_gc_impl_object_id_to_ref(void *objspace_ptr, VALUE object_id);
GC_IMPL_FN void rb_gc_impl_object_id_move(void *objspace_ptr, VALUE dest, VALUE src);
// Forking
GC_IMPL_FN void rb_gc_impl_before_fork(void *objspace_ptr);
GC_IMPL_FN void rb_gc_impl_after_fork(void *objspace_ptr, rb_pid_t pid);

View File

@ -1108,16 +1108,21 @@ objspace_obj_id_init(struct objspace *objspace)
VALUE
rb_gc_impl_object_id(void *objspace_ptr, VALUE obj)
{
VALUE id;
struct objspace *objspace = objspace_ptr;
unsigned int lev = rb_gc_vm_lock();
VALUE id;
if (st_lookup(objspace->obj_to_id_tbl, (st_data_t)obj, &id)) {
RUBY_ASSERT(FL_TEST(obj, FL_SEEN_OBJ_ID));
if (FL_TEST(obj, FL_SEEN_OBJ_ID)) {
st_data_t val;
if (st_lookup(objspace->obj_to_id_tbl, (st_data_t)obj, &val)) {
id = (VALUE)val;
}
else {
rb_bug("rb_gc_impl_object_id: FL_SEEN_OBJ_ID flag set but not found in table");
}
}
else {
RUBY_ASSERT(!FL_TEST(obj, FL_SEEN_OBJ_ID));
RUBY_ASSERT(!st_lookup(objspace->obj_to_id_tbl, (st_data_t)obj, NULL));
id = ULL2NUM(objspace->next_object_id);
objspace->next_object_id += OBJ_ID_INCREMENT;
@ -1126,7 +1131,6 @@ rb_gc_impl_object_id(void *objspace_ptr, VALUE obj)
st_insert(objspace->id_to_obj_tbl, (st_data_t)id, (st_data_t)obj);
FL_SET(obj, FL_SEEN_OBJ_ID);
}
rb_gc_vm_unlock(lev);
return id;
@ -1151,6 +1155,27 @@ rb_gc_impl_object_id_to_ref(void *objspace_ptr, VALUE object_id)
}
}
void
rb_gc_impl_object_id_move(void *objspace_ptr, VALUE dest, VALUE src)
{
/* If the source object's object_id has been seen, we need to update
* the object to object id mapping. */
st_data_t id = 0;
struct objspace *objspace = objspace_ptr;
unsigned int lev = rb_gc_vm_lock();
st_data_t key = (st_data_t)src;
if (!st_delete(objspace->obj_to_id_tbl, &key, &id)) {
rb_bug("gc_move: object ID seen, but not in mapping table: %s", rb_obj_info(src));
}
FL_UNSET_RAW(src, FL_SEEN_OBJ_ID);
st_insert(objspace->obj_to_id_tbl, (st_data_t)dest, id);
st_insert(objspace->id_to_obj_tbl, id, (st_data_t)dest);
FL_SET_RAW(dest, FL_SEEN_OBJ_ID);
rb_gc_vm_unlock(lev);
}
// Forking
void

View File

@ -183,6 +183,7 @@ struct rb_gc_object_metadata_entry {
/* gc.c */
RUBY_ATTR_MALLOC void *ruby_mimmalloc(size_t size);
RUBY_ATTR_MALLOC void *ruby_mimcalloc(size_t num, size_t size);
void rb_gc_ractor_moved(VALUE dest, VALUE src);
void ruby_mimfree(void *ptr);
void rb_gc_prepare_heap(void);
void rb_objspace_set_event_hook(const rb_event_flag_t event);

View File

@ -12,6 +12,7 @@
#include "internal/error.h"
#include "internal/gc.h"
#include "internal/hash.h"
#include "internal/object.h"
#include "internal/ractor.h"
#include "internal/rational.h"
#include "internal/struct.h"
@ -3546,37 +3547,6 @@ rb_obj_traverse_replace(VALUE obj,
}
}
struct RVALUE {
VALUE flags;
VALUE klass;
VALUE v1;
VALUE v2;
VALUE v3;
};
static const VALUE fl_users = FL_USER1 | FL_USER2 | FL_USER3 |
FL_USER4 | FL_USER5 | FL_USER6 | FL_USER7 |
FL_USER8 | FL_USER9 | FL_USER10 | FL_USER11 |
FL_USER12 | FL_USER13 | FL_USER14 | FL_USER15 |
FL_USER16 | FL_USER17 | FL_USER18 | FL_USER19;
static void
ractor_moved_bang(VALUE obj)
{
// invalidate src object
struct RVALUE *rv = (void *)obj;
rv->klass = rb_cRactorMovedObject;
rv->v1 = 0;
rv->v2 = 0;
rv->v3 = 0;
rv->flags = rv->flags & ~fl_users;
if (BUILTIN_TYPE(obj) == T_OBJECT) ROBJECT_SET_SHAPE_ID(obj, ROOT_SHAPE_ID);
// TODO: record moved location
}
static enum obj_traverse_iterator_result
move_enter(VALUE obj, struct obj_traverse_replace_data *data)
{
@ -3585,39 +3555,16 @@ move_enter(VALUE obj, struct obj_traverse_replace_data *data)
return traverse_skip;
}
else {
VALUE moved = rb_obj_alloc(RBASIC_CLASS(obj));
rb_shape_set_shape(moved, rb_shape_get_shape(obj));
data->replacement = moved;
data->replacement = rb_obj_clone(obj);
return traverse_cont;
}
}
void rb_replace_generic_ivar(VALUE clone, VALUE obj); // variable.c
static enum obj_traverse_iterator_result
move_leave(VALUE obj, struct obj_traverse_replace_data *data)
{
VALUE v = data->replacement;
struct RVALUE *dst = (struct RVALUE *)v;
struct RVALUE *src = (struct RVALUE *)obj;
dst->flags = (dst->flags & ~fl_users) | (src->flags & fl_users);
dst->v1 = src->v1;
dst->v2 = src->v2;
dst->v3 = src->v3;
if (UNLIKELY(FL_TEST_RAW(obj, FL_EXIVAR))) {
rb_replace_generic_ivar(v, obj);
}
if (OBJ_FROZEN(obj)) {
OBJ_FREEZE(v);
}
// TODO: generic_ivar
ractor_moved_bang(obj);
rb_gc_ractor_moved(data->replacement, obj);
RBASIC_SET_CLASS_RAW(obj, rb_cRactorMovedObject);
return traverse_cont;
}

View File

@ -2157,27 +2157,6 @@ rb_copy_generic_ivar(VALUE clone, VALUE obj)
}
}
void
rb_replace_generic_ivar(VALUE clone, VALUE obj)
{
RUBY_ASSERT(FL_TEST(obj, FL_EXIVAR));
RB_VM_LOCK_ENTER();
{
st_data_t ivtbl, obj_data = (st_data_t)obj;
if (st_lookup(generic_iv_tbl_, (st_data_t)obj, &ivtbl)) {
st_insert(generic_iv_tbl_, (st_data_t)clone, ivtbl);
st_delete(generic_iv_tbl_, &obj_data, NULL);
}
else {
rb_bug("unreachable");
}
}
RB_VM_LOCK_LEAVE();
FL_SET(clone, FL_EXIVAR);
}
void
rb_ivar_foreach(VALUE obj, rb_ivar_foreach_callback_func *func, st_data_t arg)
{