From 476d655053b0e3ea447dc6549b821d18636c6603 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Fri, 6 Dec 2024 22:49:53 -0500 Subject: [PATCH] objspace_dump: Use FILE* to avoid crashing in mark functions We observed crashes from rb_io_bufwrite() thread switching (through rb_thread_check_ints()) in the middle of rb_execution_context_mark(). By the time rb_execution_context_mark() gets a timeslice again, it read garbage from a frame that was already popped in another thread, crashing the process in SEGV. Other mark functions probably have their own ways of breaking, but clearly, the usual IO code do too much for this perilous pseudo GC context. Use `FILE*` like before 5001cc47169614ea07d87651c95c2ee185e374e0 ("Optimize ObjectSpace.dump_all"). Also, add type checking for the private _dump methods. Co-authored-by: Peter Zhu --- ext/objspace/objspace_dump.c | 39 +++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/ext/objspace/objspace_dump.c b/ext/objspace/objspace_dump.c index b1f7ab9804..1bcb4033cb 100644 --- a/ext/objspace/objspace_dump.c +++ b/ext/objspace/objspace_dump.c @@ -36,9 +36,10 @@ RUBY_EXTERN const char ruby_hexdigits[]; #define BUFFER_CAPACITY 4096 struct dump_config { - VALUE type; - VALUE stream; + VALUE given_output; + VALUE output_io; VALUE string; + FILE *stream; const char *root_category; VALUE cur_obj; VALUE cur_obj_klass; @@ -58,7 +59,7 @@ dump_flush(struct dump_config *dc) { if (dc->buffer_len) { if (dc->stream) { - size_t written = rb_io_bufwrite(dc->stream, dc->buffer, dc->buffer_len); + size_t written = fwrite(dc->buffer, sizeof(dc->buffer[0]), dc->buffer_len, dc->stream); if (written < dc->buffer_len) { MEMMOVE(dc->buffer, dc->buffer + written, char, dc->buffer_len - written); dc->buffer_len -= written; @@ -696,16 +697,34 @@ root_obj_i(const char *category, VALUE obj, void *data) static void dump_output(struct dump_config *dc, VALUE output, VALUE full, VALUE since, VALUE shapes) { - + dc->given_output = output; dc->full_heap = 0; dc->buffer_len = 0; if (TYPE(output) == T_STRING) { - dc->stream = Qfalse; + dc->stream = NULL; dc->string = output; } else { - dc->stream = output; + rb_io_t *fptr; + // Output should be an IO, typecheck and get a FILE* for writing. + // We cannot write with the usual IO code here because writes + // interleave with calls to rb_gc_mark(). The usual IO code can + // cause a thread switch, raise exceptions, and even run arbitrary + // ruby code through the fiber scheduler. + // + // Mark functions generally can't handle these possibilities so + // the usual IO code is unsafe in this context. (For example, + // there are many ways to crash when ruby code runs and mutates + // the execution context while rb_execution_context_mark() is in + // progress.) + // + // Using FILE* isn't perfect, but it avoids the most acute problems. + output = rb_io_get_io(output); + dc->output_io = rb_io_get_write_io(output); + rb_io_flush(dc->output_io); + GetOpenFile(dc->output_io, fptr); + dc->stream = rb_io_stdio_file(fptr); dc->string = Qfalse; } @@ -729,13 +748,13 @@ dump_result(struct dump_config *dc) { dump_flush(dc); + if (dc->stream) { + fflush(dc->stream); + } if (dc->string) { return dc->string; } - else { - rb_io_flush(dc->stream); - return dc->stream; - } + return dc->given_output; } /* :nodoc: */