objspace_dump: Use FILE* to avoid crashing in mark functions

We observed crashes from rb_io_bufwrite() thread switching (through rb_thread_check_ints()) in the middle of rb_execution_context_mark(). By the time rb_execution_context_mark() gets a timeslice again, it read garbage from a frame that was already popped in another thread, crashing the process in SEGV. Other mark functions probably have their own ways of breaking, but clearly, the usual IO code do too much for this perilous pseudo GC context. Use `FILE*` like before 5001cc47169614ea07d87651c95c2ee185e374e0 ("Optimize ObjectSpace.dump_all"). Also, add type checking for the private _dump methods. Co-authored-by: Peter Zhu <peter@peterzhu.ca>
Merged: https://github.com/ruby/ruby/pull/12285
2024-12-06 22:49:53 -05:00 · 2024-12-06 22:49:53 -05:00 · 476d655053 · 2024-12-09 21:08:53 +00:00
commit 476d655053
parent de7feb0538
1 changed files with 29 additions and 10 deletions
--- a/ext/objspace/objspace_dump.c
+++ b/ext/objspace/objspace_dump.c
@ -36,9 +36,10 @@ RUBY_EXTERN const char ruby_hexdigits[];
 #define BUFFER_CAPACITY 4096

 struct dump_config {
-    VALUE type;
-    VALUE stream;
+    VALUE given_output;
+    VALUE output_io;
    VALUE string;
+    FILE *stream;
    const char *root_category;
    VALUE cur_obj;
    VALUE cur_obj_klass;
@ -58,7 +59,7 @@ dump_flush(struct dump_config *dc)
 {
    if (dc->buffer_len) {
        if (dc->stream) {
-            size_t written = rb_io_bufwrite(dc->stream, dc->buffer, dc->buffer_len);
+            size_t written = fwrite(dc->buffer, sizeof(dc->buffer[0]), dc->buffer_len, dc->stream);
            if (written < dc->buffer_len) {
                MEMMOVE(dc->buffer, dc->buffer + written, char, dc->buffer_len - written);
                dc->buffer_len -= written;
@ -696,16 +697,34 @@ root_obj_i(const char *category, VALUE obj, void *data)
 static void
 dump_output(struct dump_config *dc, VALUE output, VALUE full, VALUE since, VALUE shapes)
 {
-
+    dc->given_output = output;
    dc->full_heap = 0;
    dc->buffer_len = 0;

    if (TYPE(output) == T_STRING) {
-        dc->stream = Qfalse;
+        dc->stream = NULL;
        dc->string = output;
    }
    else {
-        dc->stream = output;
+        rb_io_t *fptr;
+        // Output should be an IO, typecheck and get a FILE* for writing.
+        // We cannot write with the usual IO code here because writes
+        // interleave with calls to rb_gc_mark(). The usual IO code can
+        // cause a thread switch, raise exceptions, and even run arbitrary
+        // ruby code through the fiber scheduler.
+        //
+        // Mark functions generally can't handle these possibilities so
+        // the usual IO code is unsafe in this context. (For example,
+        // there are many ways to crash when ruby code runs and mutates
+        // the execution context while rb_execution_context_mark() is in
+        // progress.)
+        //
+        // Using FILE* isn't perfect, but it avoids the most acute problems.
+        output = rb_io_get_io(output);
+        dc->output_io = rb_io_get_write_io(output);
+        rb_io_flush(dc->output_io);
+        GetOpenFile(dc->output_io, fptr);
+        dc->stream = rb_io_stdio_file(fptr);
        dc->string = Qfalse;
    }

@ -729,13 +748,13 @@ dump_result(struct dump_config *dc)
 {
    dump_flush(dc);

+    if (dc->stream) {
+        fflush(dc->stream);
+    }
    if (dc->string) {
        return dc->string;
    }
-    else {
-        rb_io_flush(dc->stream);
-        return dc->stream;
-    }
+    return dc->given_output;
 }

 /* :nodoc: */