[ruby/psych] Guard from memory leak in Psych::Emitter#start_document

When an exception is raised, it can leak memory in `head`. There are two
places that can leak memory:

1. `Check_Type(tuple, T_ARRAY)` can leak memory if `tuple` is not an
   array.
2. `StringValue(name)` and `StringValue(value)` if they are not strings
   and the call to `to_str` does not return a string.

This commit fixes these memory leaks by wrapping the code around a
rb_ensure so that the memory is freed in all cases.

The following code demonstrates the memory leak:

    emitter = Psych::Emitter.new(StringIO.new)
    nil_to_string_tags = [[nil, "tag:TALOS"]] + ([1] * 1000)
    expected_array_tags = [1] * 1000

    10.times do
      1_000.times do
        # Raises `no implicit conversion of nil into String`
        emitter.start_document([], nil_to_string_tags, 0)
      rescue TypeError
      end

      1_000.times do
        # Raises `wrong argument type Integer (expected Array)`
        emitter.start_document([], expected_array_tags, 0)
      rescue TypeError
      end

      puts `ps -o rss= -p #{$$}`
    end

Before:

    47248
    79728
    111968
    144224
    176480
    208896
    241104
    273280
    305472
    337664

After:

    14832
    15088
    15344
    15344
    15360
    15632
    15632
    15632
    15648
    15648

https://github.com/ruby/psych/commit/053af73818
This commit is contained in:
Peter Zhu 2024-08-09 16:17:36 -04:00 committed by git
parent 4e85b6b4c4
commit 7b7dde37f5

View File

@ -136,23 +136,29 @@ static VALUE end_stream(VALUE self)
return self;
}
/* call-seq: emitter.start_document(version, tags, implicit)
*
* Start a document emission with YAML +version+, +tags+, and an +implicit+
* start.
*
* See Psych::Handler#start_document
*/
static VALUE start_document(VALUE self, VALUE version, VALUE tags, VALUE imp)
struct start_document_data {
VALUE self;
VALUE version;
VALUE tags;
VALUE imp;
yaml_tag_directive_t * head;
};
static VALUE start_document_try(VALUE d)
{
struct start_document_data * data = (struct start_document_data *)d;
VALUE self = data->self;
VALUE version = data->version;
VALUE tags = data->tags;
VALUE imp = data->imp;
yaml_emitter_t * emitter;
yaml_tag_directive_t * head = NULL;
yaml_tag_directive_t * tail = NULL;
yaml_event_t event;
yaml_version_directive_t version_directive;
TypedData_Get_Struct(self, yaml_emitter_t, &psych_emitter_type, emitter);
Check_Type(version, T_ARRAY);
if(RARRAY_LEN(version) > 0) {
@ -171,8 +177,8 @@ static VALUE start_document(VALUE self, VALUE version, VALUE tags, VALUE imp)
Check_Type(tags, T_ARRAY);
len = RARRAY_LEN(tags);
head = xcalloc((size_t)len, sizeof(yaml_tag_directive_t));
tail = head;
data->head = xcalloc((size_t)len, sizeof(yaml_tag_directive_t));
tail = data->head;
for(i = 0; i < len && i < RARRAY_LEN(tags); i++) {
VALUE tuple = RARRAY_AREF(tags, i);
@ -182,9 +188,9 @@ static VALUE start_document(VALUE self, VALUE version, VALUE tags, VALUE imp)
Check_Type(tuple, T_ARRAY);
if(RARRAY_LEN(tuple) < 2) {
xfree(head);
rb_raise(rb_eRuntimeError, "tag tuple must be of length 2");
}
name = RARRAY_AREF(tuple, 0);
value = RARRAY_AREF(tuple, 1);
StringValue(name);
@ -202,18 +208,46 @@ static VALUE start_document(VALUE self, VALUE version, VALUE tags, VALUE imp)
yaml_document_start_event_initialize(
&event,
(RARRAY_LEN(version) > 0) ? &version_directive : NULL,
head,
data->head,
tail,
imp ? 1 : 0
);
emit(emitter, &event);
if(head) xfree(head);
return self;
}
static VALUE start_document_ensure(VALUE d)
{
struct start_document_data * data = (struct start_document_data *)d;
xfree(data->head);
return Qnil;
}
/* call-seq: emitter.start_document(version, tags, implicit)
*
* Start a document emission with YAML +version+, +tags+, and an +implicit+
* start.
*
* See Psych::Handler#start_document
*/
static VALUE start_document(VALUE self, VALUE version, VALUE tags, VALUE imp)
{
struct start_document_data data = {
.self = self,
.version = version,
.tags = tags,
.imp = imp,
.head = NULL,
};
return rb_ensure(start_document_try, (VALUE)&data, start_document_ensure, (VALUE)&data);
}
/* call-seq: emitter.end_document(implicit)
*
* End a document emission with an +implicit+ ending.