ObjectSpace.dump_all: dump shapes as well

I see several arguments in doing so.

First they use a non trivial amount of memory, so for various memory
profiling/mapping tools it is relevant to have visibility of the space
occupied by shapes.

Then, some pathological code can create a tons of shape, so it is
valuable to have a way to have a way to observe shapes without having
to compile Ruby with `SHAPE_DEBUG=1`.

And additionally it's likely much faster to dump then this way than
to use `RubyVM::Shape`.

There are however a few open questions:

- Shapes can't respect the `since:` argument. Not sure what to do when
  it is provided. Would probably make sense to not dump them.
- Maybe it would make more sense to have a separate `ObjectSpace.dump_shapes`?
- Maybe instead `dump_all` should take a `shapes: false` argument?

Additionally, `ObjectSpace.dump_shapes` is added for the use case of
debugging the evolution of the shape tree.
This commit is contained in:
Jean Boussier 2022-12-06 12:56:51 +01:00 committed by Jean Boussier
parent b19490f75d
commit 73771e4b19
Notes: git 2022-12-08 17:46:35 +00:00
7 changed files with 248 additions and 40 deletions

View File

@ -14217,6 +14217,7 @@ shape.$(OBJEXT): {$(VPATH)}shape.c
shape.$(OBJEXT): {$(VPATH)}shape.h shape.$(OBJEXT): {$(VPATH)}shape.h
shape.$(OBJEXT): {$(VPATH)}st.h shape.$(OBJEXT): {$(VPATH)}st.h
shape.$(OBJEXT): {$(VPATH)}subst.h shape.$(OBJEXT): {$(VPATH)}subst.h
shape.$(OBJEXT): {$(VPATH)}symbol.h
shape.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h shape.$(OBJEXT): {$(VPATH)}thread_$(THREAD_MODEL).h
shape.$(OBJEXT): {$(VPATH)}thread_native.h shape.$(OBJEXT): {$(VPATH)}thread_native.h
shape.$(OBJEXT): {$(VPATH)}variable.h shape.$(OBJEXT): {$(VPATH)}variable.h

View File

@ -556,6 +556,7 @@ objspace_dump.o: $(top_srcdir)/node.h
objspace_dump.o: $(top_srcdir)/ruby_assert.h objspace_dump.o: $(top_srcdir)/ruby_assert.h
objspace_dump.o: $(top_srcdir)/ruby_atomic.h objspace_dump.o: $(top_srcdir)/ruby_atomic.h
objspace_dump.o: $(top_srcdir)/shape.h objspace_dump.o: $(top_srcdir)/shape.h
objspace_dump.o: $(top_srcdir)/symbol.h
objspace_dump.o: $(top_srcdir)/thread_pthread.h objspace_dump.o: $(top_srcdir)/thread_pthread.h
objspace_dump.o: $(top_srcdir)/vm_core.h objspace_dump.o: $(top_srcdir)/vm_core.h
objspace_dump.o: $(top_srcdir)/vm_opts.h objspace_dump.o: $(top_srcdir)/vm_opts.h

View File

@ -6,6 +6,7 @@ module ObjectSpace
class << self class << self
private :_dump private :_dump
private :_dump_all private :_dump_all
private :_dump_shapes
end end
module_function module_function
@ -53,23 +54,38 @@ module ObjectSpace
# #
# Dump the contents of the ruby heap as JSON. # Dump the contents of the ruby heap as JSON.
# #
#. _full__ must be a boolean. If true all heap slots are dumped including the empty ones (T_NONE).
#
# _since_ must be a non-negative integer or +nil+. # _since_ must be a non-negative integer or +nil+.
# #
# If _since_ is a positive integer, only objects of that generation and # If _since_ is a positive integer, only objects of that generation and
# newer generations are dumped. The current generation can be accessed using # newer generations are dumped. The current generation can be accessed using
# GC::count. # GC::count. Objects that were allocated without object allocation tracing enabled
#
# Objects that were allocated without object allocation tracing enabled
# are ignored. See ::trace_object_allocations for more information and # are ignored. See ::trace_object_allocations for more information and
# examples. # examples.
# #
# If _since_ is omitted or is +nil+, all objects are dumped. # If _since_ is omitted or is +nil+, all objects are dumped.
# #
# _shapes_ must be a boolean or a non-negative integer.
#
# If _shapes_ is a positive integer, only shapes newer than the provided
# shape id are dumped. The current shape_id can be accessed using +RubyVM.stat(:next_shape_id)+.
#
# If _shapes_ is +false+, no shapes are dumped.
#
# To only dump objects allocated past a certain point you can combine _since_ and _shapes_:
# ObjectSpace.trace_object_allocations
# GC.start
# gc_generation = GC.count
# shape_generation = RubyVM.stat(:next_shape_id)
#. call_method_to_instrument
# ObjectSpace.dump_all(since: gc_generation, shapes: shape_generation)
#
# This method is only expected to work with C Ruby. # This method is only expected to work with C Ruby.
# This is an experimental method and is subject to change. # This is an experimental method and is subject to change.
# In particular, the function signature and output format are # In particular, the function signature and output format are
# not guaranteed to be compatible in future versions of ruby. # not guaranteed to be compatible in future versions of ruby.
def dump_all(output: :file, full: false, since: nil) def dump_all(output: :file, full: false, since: nil, shapes: true)
out = case output out = case output
when :file, nil when :file, nil
require 'tempfile' require 'tempfile'
@ -84,7 +100,46 @@ module ObjectSpace
raise ArgumentError, "wrong output option: #{output.inspect}" raise ArgumentError, "wrong output option: #{output.inspect}"
end end
ret = _dump_all(out, full, since) shapes = 0 if shapes == true
ret = _dump_all(out, full, since, shapes)
return nil if output == :stdout
ret
end
# call-seq:
# ObjectSpace.dump_shapes([output: :file]) # => #<File:/tmp/rubyshapes20131125-88469-laoj3v.json>
# ObjectSpace.dump_shapes(output: :stdout) # => nil
# ObjectSpace.dump_shapes(output: :string) # => "{...}\n{...}\n..."
# ObjectSpace.dump_shapes(output:
# File.open('shapes.json','w')) # => #<File:shapes.json>
# ObjectSpace.dump_all(output: :string,
# since: 42) # => "{...}\n{...}\n..."
#
# Dump the contents of the ruby shape tree as JSON.
#
# If _shapes_ is a positive integer, only shapes newer than the provided
# shape id are dumped. The current shape_id can be accessed using +RubyVM.stat(:next_shape_id)+.
#
# This method is only expected to work with C Ruby.
# This is an experimental method and is subject to change.
# In particular, the function signature and output format are
# not guaranteed to be compatible in future versions of ruby.
def dump_shapes(output: :file, since: 0)
out = case output
when :file, nil
require 'tempfile'
Tempfile.create(%w(rubyshapes .json))
when :stdout
STDOUT
when :string
+''
when IO
output
else
raise ArgumentError, "wrong output option: #{output.inspect}"
end
ret = _dump_shapes(out, since)
return nil if output == :stdout return nil if output == :stdout
ret ret
end end

View File

@ -18,6 +18,8 @@
#include "internal/hash.h" #include "internal/hash.h"
#include "internal/string.h" #include "internal/string.h"
#include "internal/sanitizers.h" #include "internal/sanitizers.h"
#include "symbol.h"
#include "shape.h"
#include "node.h" #include "node.h"
#include "objspace.h" #include "objspace.h"
#include "ruby/debug.h" #include "ruby/debug.h"
@ -42,6 +44,7 @@ struct dump_config {
unsigned int full_heap: 1; unsigned int full_heap: 1;
unsigned int partial_dump; unsigned int partial_dump;
size_t since; size_t since;
size_t shapes_since;
unsigned long buffer_len; unsigned long buffer_len;
char buffer[BUFFER_CAPACITY]; char buffer[BUFFER_CAPACITY];
}; };
@ -350,6 +353,20 @@ dump_append_string_content(struct dump_config *dc, VALUE obj)
} }
} }
static inline void
dump_append_id(struct dump_config *dc, ID id)
{
if (is_instance_id(id)) {
dump_append_string_value(dc, rb_sym2str(ID2SYM(id)));
}
else {
dump_append(dc, "\"ID_INTERNAL(");
dump_append_sizet(dc, rb_id_to_serial(id));
dump_append(dc, ")\"");
}
}
static void static void
dump_object(VALUE obj, struct dump_config *dc) dump_object(VALUE obj, struct dump_config *dc)
{ {
@ -378,13 +395,16 @@ dump_object(VALUE obj, struct dump_config *dc)
dump_append(dc, "{\"address\":"); dump_append(dc, "{\"address\":");
dump_append_ref(dc, obj); dump_append_ref(dc, obj);
dump_append(dc, ", \"shape_id\":");
dump_append_sizet(dc, rb_shape_get_shape_id(obj));
dump_append(dc, ", \"type\":\""); dump_append(dc, ", \"type\":\"");
dump_append(dc, obj_type(obj)); dump_append(dc, obj_type(obj));
dump_append(dc, "\""); dump_append(dc, "\"");
size_t shape_id = rb_shape_get_shape_id(obj);
if (shape_id) {
dump_append(dc, ", \"shape_id\":");
dump_append_sizet(dc, shape_id);
}
dump_append(dc, ", \"slot_size\":"); dump_append(dc, ", \"slot_size\":");
dump_append_sizet(dc, dc->cur_page_slot_size); dump_append_sizet(dc, dc->cur_page_slot_size);
@ -622,7 +642,7 @@ root_obj_i(const char *category, VALUE obj, void *data)
} }
static void static void
dump_output(struct dump_config *dc, VALUE output, VALUE full, VALUE since) dump_output(struct dump_config *dc, VALUE output, VALUE full, VALUE since, VALUE shapes)
{ {
dc->full_heap = 0; dc->full_heap = 0;
@ -648,6 +668,8 @@ dump_output(struct dump_config *dc, VALUE output, VALUE full, VALUE since)
else { else {
dc->partial_dump = 0; dc->partial_dump = 0;
} }
dc->shapes_since = RTEST(shapes) ? NUM2SIZET(shapes) : 0;
} }
static VALUE static VALUE
@ -672,18 +694,81 @@ objspace_dump(VALUE os, VALUE obj, VALUE output)
dc.cur_page_slot_size = rb_gc_obj_slot_size(obj); dc.cur_page_slot_size = rb_gc_obj_slot_size(obj);
} }
dump_output(&dc, output, Qnil, Qnil); dump_output(&dc, output, Qnil, Qnil, Qnil);
dump_object(obj, &dc); dump_object(obj, &dc);
return dump_result(&dc); return dump_result(&dc);
} }
static void
shape_i(rb_shape_t *shape, void *data)
{
struct dump_config *dc = (struct dump_config *)data;
size_t shape_id = rb_shape_id(shape);
if (shape_id < dc->shapes_since) {
return;
}
dump_append(dc, "{\"address\":");
dump_append_ref(dc, (VALUE)shape);
dump_append(dc, ", \"type\":\"SHAPE\", \"id\":");
dump_append_sizet(dc, shape_id);
if (shape->type != SHAPE_ROOT) {
dump_append(dc, ", \"parent_id\":");
dump_append_lu(dc, shape->parent_id);
}
dump_append(dc, ", \"depth\":");
dump_append_sizet(dc, rb_shape_depth(shape));
dump_append(dc, ", \"shape_type\":");
switch(shape->type) {
case SHAPE_ROOT:
dump_append(dc, "\"ROOT\"");
break;
case SHAPE_IVAR:
dump_append(dc, "\"IVAR\"");
dump_append(dc, ",\"edge_name\":");
dump_append_id(dc, shape->edge_name);
break;
case SHAPE_FROZEN:
dump_append(dc, "\"FROZEN\"");
break;
case SHAPE_CAPACITY_CHANGE:
dump_append(dc, "\"CAPACITY_CHANGE\"");
dump_append(dc, ", \"capacity\":");
dump_append_sizet(dc, shape->capacity);
break;
case SHAPE_INITIAL_CAPACITY:
dump_append(dc, "\"INITIAL_CAPACITY\"");
break;
case SHAPE_T_OBJECT:
dump_append(dc, "\"T_OBJECT\"");
break;
default:
rb_bug("[objspace] unexpected shape type");
}
dump_append(dc, ", \"edges\":");
dump_append_sizet(dc, rb_shape_edges_count(shape));
dump_append(dc, ", \"memsize\":");
dump_append_sizet(dc, rb_shape_memsize(shape));
dump_append(dc, "}\n");
}
static VALUE static VALUE
objspace_dump_all(VALUE os, VALUE output, VALUE full, VALUE since) objspace_dump_all(VALUE os, VALUE output, VALUE full, VALUE since, VALUE shapes)
{ {
struct dump_config dc = {0,}; struct dump_config dc = {0,};
dump_output(&dc, output, full, since); dump_output(&dc, output, full, since, shapes);
if (!dc.partial_dump || dc.since == 0) { if (!dc.partial_dump || dc.since == 0) {
/* dump roots */ /* dump roots */
@ -691,12 +776,28 @@ objspace_dump_all(VALUE os, VALUE output, VALUE full, VALUE since)
if (dc.roots) dump_append(&dc, "]}\n"); if (dc.roots) dump_append(&dc, "]}\n");
} }
if (RTEST(shapes)) {
rb_shape_each_shape(shape_i, &dc);
}
/* dump all objects */ /* dump all objects */
rb_objspace_each_objects(heap_i, &dc); rb_objspace_each_objects(heap_i, &dc);
return dump_result(&dc); return dump_result(&dc);
} }
static VALUE
objspace_dump_shapes(VALUE os, VALUE output, VALUE shapes)
{
struct dump_config dc = {0,};
dump_output(&dc, output, Qfalse, Qnil, shapes);
if (RTEST(shapes)) {
rb_shape_each_shape(shape_i, &dc);
}
return dump_result(&dc);
}
void void
Init_objspace_dump(VALUE rb_mObjSpace) Init_objspace_dump(VALUE rb_mObjSpace)
{ {
@ -706,7 +807,8 @@ Init_objspace_dump(VALUE rb_mObjSpace)
#endif #endif
rb_define_module_function(rb_mObjSpace, "_dump", objspace_dump, 2); rb_define_module_function(rb_mObjSpace, "_dump", objspace_dump, 2);
rb_define_module_function(rb_mObjSpace, "_dump_all", objspace_dump_all, 3); rb_define_module_function(rb_mObjSpace, "_dump_all", objspace_dump_all, 4);
rb_define_module_function(rb_mObjSpace, "_dump_shapes", objspace_dump_shapes, 2);
/* force create static IDs */ /* force create static IDs */
rb_obj_gc_flags(rb_mObjSpace, 0, 0); rb_obj_gc_flags(rb_mObjSpace, 0, 0);

79
shape.c
View File

@ -2,6 +2,8 @@
#include "vm_sync.h" #include "vm_sync.h"
#include "shape.h" #include "shape.h"
#include "gc.h" #include "gc.h"
#include "symbol.h"
#include "id_table.h"
#include "internal/class.h" #include "internal/class.h"
#include "internal/symbol.h" #include "internal/symbol.h"
#include "internal/variable.h" #include "internal/variable.h"
@ -37,6 +39,17 @@ rb_shape_root_shape_p(rb_shape_t* shape)
return shape == rb_shape_get_root_shape(); return shape == rb_shape_get_root_shape();
} }
void
rb_shape_each_shape(each_shape_callback callback, void *data)
{
rb_shape_t *cursor = rb_shape_get_root_shape();
rb_shape_t *end = rb_shape_get_shape_by_id(GET_VM()->next_shape_id);
while (cursor < end) {
callback(cursor, data);
cursor += 1;
}
}
rb_shape_t* rb_shape_t*
rb_shape_get_shape_by_id(shape_id_t shape_id) rb_shape_get_shape_by_id(shape_id_t shape_id)
{ {
@ -97,10 +110,10 @@ rb_shape_get_shape_id(VALUE obj)
#endif #endif
} }
unsigned int size_t
rb_shape_depth(rb_shape_t * shape) rb_shape_depth(rb_shape_t * shape)
{ {
unsigned int depth = 1; size_t depth = 1;
while (shape->parent_id != INVALID_SHAPE_ID) { while (shape->parent_id != INVALID_SHAPE_ID) {
depth++; depth++;
@ -285,6 +298,7 @@ rb_shape_transition_shape_frozen(VALUE obj)
rb_shape_t * rb_shape_t *
rb_shape_get_next_iv_shape(rb_shape_t* shape, ID id) rb_shape_get_next_iv_shape(rb_shape_t* shape, ID id)
{ {
RUBY_ASSERT(!is_instance_id(id) || RTEST(rb_sym2str(ID2SYM(id))));
return get_next_shape_internal(shape, id, SHAPE_IVAR); return get_next_shape_internal(shape, id, SHAPE_IVAR);
} }
@ -428,16 +442,45 @@ rb_shape_rebuild_shape(rb_shape_t * initial_shape, rb_shape_t * dest_shape)
return midway_shape; return midway_shape;
} }
size_t
rb_shape_edges_count(rb_shape_t *shape)
{
if (shape->edges) {
return rb_id_table_size(shape->edges);
}
return 0;
}
size_t
rb_shape_memsize(rb_shape_t *shape)
{
size_t memsize = sizeof(rb_shape_t);
if (shape->edges) {
memsize += rb_id_table_memsize(shape->edges);
}
return memsize;
}
#if SHAPE_DEBUG #if SHAPE_DEBUG
VALUE rb_cShape; VALUE rb_cShape;
static size_t
shape_memsize(const void *shape_ptr)
{
return rb_shape_memsize((rb_shape_t *)shape_ptr);
}
/* /*
* Exposing Shape to Ruby via RubyVM.debug_shape * Exposing Shape to Ruby via RubyVM.debug_shape
*/ */
static const rb_data_type_t shape_data_type = { static const rb_data_type_t shape_data_type = {
"Shape", .wrap_struct_name = "Shape",
{NULL, NULL, NULL,}, .function = {
0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED .dmark = NULL,
.dfree = NULL,
.dsize = shape_memsize,
},
.flags = RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED
}; };
static VALUE static VALUE
@ -480,12 +523,10 @@ rb_shape_parent_id(VALUE self)
static VALUE static VALUE
parse_key(ID key) parse_key(ID key)
{ {
if ((key & RUBY_ID_INTERNAL) == RUBY_ID_INTERNAL) { if (is_instance_id(key)) {
return LONG2NUM(key);
}
else {
return ID2SYM(key); return ID2SYM(key);
} }
return LONG2NUM(key);
} }
static VALUE static VALUE
@ -527,17 +568,13 @@ rb_shape_edge_name(VALUE self)
rb_shape_t* shape; rb_shape_t* shape;
TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape); TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape);
if ((shape->edge_name & (ID_INTERNAL)) == ID_INTERNAL) { if (shape->edge_name) {
return INT2NUM(shape->capacity); if (is_instance_id(shape->edge_name)) {
}
else {
if (shape->edge_name) {
return ID2SYM(shape->edge_name); return ID2SYM(shape->edge_name);
} }
else { return INT2NUM(shape->capacity);
return Qnil;
}
} }
return Qnil;
} }
static VALUE static VALUE
@ -563,13 +600,7 @@ rb_shape_export_depth(VALUE self)
{ {
rb_shape_t* shape; rb_shape_t* shape;
TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape); TypedData_Get_Struct(self, rb_shape_t, &shape_data_type, shape);
return SIZET2NUM(rb_shape_depth(shape));
unsigned int depth = 0;
while (shape->parent_id != INVALID_SHAPE_ID) {
depth++;
shape = rb_shape_get_parent(shape);
}
return INT2NUM(depth);
} }
static VALUE static VALUE

View File

@ -124,7 +124,6 @@ bool rb_shape_root_shape_p(rb_shape_t* shape);
rb_shape_t * rb_shape_get_root_shape(void); rb_shape_t * rb_shape_get_root_shape(void);
uint8_t rb_shape_id_num_bits(void); uint8_t rb_shape_id_num_bits(void);
int32_t rb_shape_id_offset(void); int32_t rb_shape_id_offset(void);
unsigned int rb_shape_depth(rb_shape_t * shape);
rb_shape_t* rb_shape_get_shape_by_id_without_assertion(shape_id_t shape_id); rb_shape_t* rb_shape_get_shape_by_id_without_assertion(shape_id_t shape_id);
rb_shape_t * rb_shape_get_parent(rb_shape_t * shape); rb_shape_t * rb_shape_get_parent(rb_shape_t * shape);
@ -184,4 +183,12 @@ bool rb_shape_set_shape_id(VALUE obj, shape_id_t shape_id);
VALUE rb_obj_debug_shape(VALUE self, VALUE obj); VALUE rb_obj_debug_shape(VALUE self, VALUE obj);
VALUE rb_shape_flags_mask(void); VALUE rb_shape_flags_mask(void);
RUBY_SYMBOL_EXPORT_BEGIN
typedef void each_shape_callback(rb_shape_t * shape, void *data);
void rb_shape_each_shape(each_shape_callback callback, void *data);
size_t rb_shape_memsize(rb_shape_t *shape);
size_t rb_shape_edges_count(rb_shape_t *shape);
size_t rb_shape_depth(rb_shape_t *shape);
RUBY_SYMBOL_EXPORT_END
#endif #endif

View File

@ -414,7 +414,7 @@ class TestObjSpace < Test::Unit::TestCase
@obj1 = Object.new @obj1 = Object.new
GC.start GC.start
@obj2 = Object.new @obj2 = Object.new
ObjectSpace.dump_all(output: :stdout, since: gc_gen) ObjectSpace.dump_all(output: :stdout, since: gc_gen, shapes: false)
end end
p dump_my_heap_please p dump_my_heap_please
@ -422,7 +422,7 @@ class TestObjSpace < Test::Unit::TestCase
assert_equal 'nil', output.pop assert_equal 'nil', output.pop
since = output.shift.to_i since = output.shift.to_i
assert_operator output.size, :>, 0 assert_operator output.size, :>, 0
generations = output.map { |l| JSON.parse(l)["generation"] }.uniq.sort generations = output.map { |l| JSON.parse(l) }.map { |o| o["generation"] }.uniq.sort
assert_equal [since, since + 1], generations assert_equal [since, since + 1], generations
end end
end end
@ -479,6 +479,7 @@ class TestObjSpace < Test::Unit::TestCase
output.each { |l| output.each { |l|
obj = JSON.parse(l) obj = JSON.parse(l)
next if obj["type"] == "ROOT" next if obj["type"] == "ROOT"
next if obj["type"] == "SHAPE"
assert_not_nil obj["slot_size"] assert_not_nil obj["slot_size"]
assert_equal 0, obj["slot_size"] % GC::INTERNAL_CONSTANTS[:RVALUE_SIZE] assert_equal 0, obj["slot_size"] % GC::INTERNAL_CONSTANTS[:RVALUE_SIZE]
@ -794,6 +795,16 @@ class TestObjSpace < Test::Unit::TestCase
assert_equal name, JSON.parse(dump)["method"], dump assert_equal name, JSON.parse(dump)["method"], dump
end end
def test_dump_shapes
json = ObjectSpace.dump_shapes(output: :string)
json.each_line do |line|
assert_include(line, '"type":"SHAPE"')
end
assert_empty ObjectSpace.dump_shapes(output: :string, since: RubyVM.stat(:next_shape_id))
assert_equal 2, ObjectSpace.dump_shapes(output: :string, since: RubyVM.stat(:next_shape_id) - 2).lines.size
end
private private
def utf8_°° def utf8_°°