objspace_dump: Use FILE* to avoid crashing in mark functions

We observed crashes from rb_io_bufwrite() thread switching (through
rb_thread_check_ints()) in the middle of rb_execution_context_mark(). By
the time rb_execution_context_mark() gets a timeslice again, it read
garbage from a frame that was already popped in another thread, crashing
the process in SEGV. Other mark functions probably have their own ways
of breaking, but clearly, the usual IO code do too much for this
perilous pseudo GC context.

Use `FILE*` like before 5001cc47169614ea07d87651c95c2ee185e374e0
("Optimize ObjectSpace.dump_all"). Also, add type checking for
the private _dump methods.

Co-authored-by: Peter Zhu <peter@peterzhu.ca>
This commit is contained in:
Alan Wu 2024-12-06 22:49:53 -05:00
parent de7feb0538
commit 476d655053
Notes: git 2024-12-09 21:08:53 +00:00

View File

@ -36,9 +36,10 @@ RUBY_EXTERN const char ruby_hexdigits[];
#define BUFFER_CAPACITY 4096
struct dump_config {
VALUE type;
VALUE stream;
VALUE given_output;
VALUE output_io;
VALUE string;
FILE *stream;
const char *root_category;
VALUE cur_obj;
VALUE cur_obj_klass;
@ -58,7 +59,7 @@ dump_flush(struct dump_config *dc)
{
if (dc->buffer_len) {
if (dc->stream) {
size_t written = rb_io_bufwrite(dc->stream, dc->buffer, dc->buffer_len);
size_t written = fwrite(dc->buffer, sizeof(dc->buffer[0]), dc->buffer_len, dc->stream);
if (written < dc->buffer_len) {
MEMMOVE(dc->buffer, dc->buffer + written, char, dc->buffer_len - written);
dc->buffer_len -= written;
@ -696,16 +697,34 @@ root_obj_i(const char *category, VALUE obj, void *data)
static void
dump_output(struct dump_config *dc, VALUE output, VALUE full, VALUE since, VALUE shapes)
{
dc->given_output = output;
dc->full_heap = 0;
dc->buffer_len = 0;
if (TYPE(output) == T_STRING) {
dc->stream = Qfalse;
dc->stream = NULL;
dc->string = output;
}
else {
dc->stream = output;
rb_io_t *fptr;
// Output should be an IO, typecheck and get a FILE* for writing.
// We cannot write with the usual IO code here because writes
// interleave with calls to rb_gc_mark(). The usual IO code can
// cause a thread switch, raise exceptions, and even run arbitrary
// ruby code through the fiber scheduler.
//
// Mark functions generally can't handle these possibilities so
// the usual IO code is unsafe in this context. (For example,
// there are many ways to crash when ruby code runs and mutates
// the execution context while rb_execution_context_mark() is in
// progress.)
//
// Using FILE* isn't perfect, but it avoids the most acute problems.
output = rb_io_get_io(output);
dc->output_io = rb_io_get_write_io(output);
rb_io_flush(dc->output_io);
GetOpenFile(dc->output_io, fptr);
dc->stream = rb_io_stdio_file(fptr);
dc->string = Qfalse;
}
@ -729,13 +748,13 @@ dump_result(struct dump_config *dc)
{
dump_flush(dc);
if (dc->stream) {
fflush(dc->stream);
}
if (dc->string) {
return dc->string;
}
else {
rb_io_flush(dc->stream);
return dc->stream;
}
return dc->given_output;
}
/* :nodoc: */