From e17c83e02c5019f7a8c31b31a567ab6de6d6c7f4 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 16 Jan 2024 09:32:21 -0500 Subject: [PATCH] Fix memory leak in String#tr and String#tr_s rb_enc_codepoint_len could raise, which would cause the memory in buf to leak. For example: str1 = "\xE0\xA0\xA1#{" " * 100}".force_encoding("EUC-JP") str2 = "" str3 = "a".force_encoding("Windows-31J") 10.times do 1_000_000.times do str1.tr_s(str2, str3) rescue end puts `ps -o rss= -p #{$$}` end Before: 17536 22752 28032 33312 38688 43968 49200 54432 59744 64992 After: 12176 12352 12352 12448 12448 12448 12448 12448 12448 12448 --- string.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/string.c b/string.c index 5eda835f4b..4c0fe87635 100644 --- a/string.c +++ b/string.c @@ -7987,7 +7987,14 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) while (s < send) { int may_modify = 0; - c0 = c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, e1); + int r = rb_enc_precise_mbclen((char *)s, (char *)send, e1); + if (!MBCLEN_CHARFOUND_P(r)) { + xfree(buf); + rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(e1)); + } + clen = MBCLEN_CHARFOUND_LEN(r); + c0 = c = rb_enc_mbc_to_codepoint((char *)s, (char *)send, e1); + tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); s += clen; @@ -8067,7 +8074,15 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) while (s < send) { int may_modify = 0; - c0 = c = rb_enc_codepoint_len((char *)s, (char *)send, &clen, e1); + + int r = rb_enc_precise_mbclen((char *)s, (char *)send, e1); + if (!MBCLEN_CHARFOUND_P(r)) { + xfree(buf); + rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(e1)); + } + clen = MBCLEN_CHARFOUND_LEN(r); + c0 = c = rb_enc_mbc_to_codepoint((char *)s, (char *)send, e1); + tlen = enc == e1 ? clen : rb_enc_codelen(c, enc); if (c < 256) {