From d54f66d1b4ce32d78b526b1ea9e3f213a763d07c Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Thu, 8 Jun 2023 21:12:14 +0100 Subject: [PATCH] Assign into optimal size pools using String#split("") When String#split is used with an empty string as the field seperator it effectively splits the original string into chars, and there is a pre-existing fast path for this using SPLIT_TYPE_CHARS. However this path creates an empty array in the smallest size pool and grows from there, despite already knowing the size of the desired array. This commit pre-allocates the correct size array in this case in order to allow the arrays to be embedded and avoid being allocated in the transient heap --- string.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/string.c b/string.c index a7d05693f1..ad502a1920 100644 --- a/string.c +++ b/string.c @@ -8662,7 +8662,6 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str) #define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count)) - if (result) result = rb_ary_new(); beg = 0; char *ptr = RSTRING_PTR(str); char *eptr = RSTRING_END(str); @@ -8671,6 +8670,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str) int skip = 1; unsigned int c; + if (result) result = rb_ary_new(); end = beg; if (is_ascii_string(str)) { while (ptr < eptr) { @@ -8730,6 +8730,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str) char *sptr = RSTRING_PTR(spat); long slen = RSTRING_LEN(spat); + if (result) result = rb_ary_new(); mustnot_broken(str); enc = rb_enc_check(str, spat); while (ptr < eptr && @@ -8751,6 +8752,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str) char *str_start = ptr; int n; + if (result) result = rb_ary_new_capa(RSTRING_LEN(str)); mustnot_broken(str); enc = rb_enc_get(str); while (ptr < eptr && @@ -8762,6 +8764,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str) beg = ptr - str_start; } else { + if (result) result = rb_ary_new(); long len = RSTRING_LEN(str); long start = beg; long idx;