Fix memory leak in regexp grapheme clusters

[Bug #20161]

The cc->mbuf gets overwritten, so we need to free it to not leak memory.

For example:

    str = "hello world".encode(Encoding::UTF_32LE)

    10.times do
      1_000.times do
        str.grapheme_clusters
      end

      puts `ps -o rss= -p #{$$}`
    end

Before:

    15536
    15760
    15920
    16144
    16304
    16480
    16640
    16784
    17008
    17280

After:

    15584
    15584
    15760
    15824
    15888
    15888
    15888
    15888
    16048
    16112
This commit is contained in:
Peter Zhu 2024-01-08 10:09:13 -05:00
parent 0a30fc6211
commit 37ed86fd3c

View File

@ -6105,7 +6105,8 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
R_ERR(add_code_range(&(cc->mbuf), env, 0x000A, 0x000A)); /* CR */
R_ERR(add_code_range(&(cc->mbuf), env, 0x000D, 0x000D)); /* LF */
R_ERR(not_code_range_buf(env->enc, cc->mbuf, &inverted_buf, env));
cc->mbuf = inverted_buf; /* TODO: check what to do with buffer before inversion */
bbuf_free(cc->mbuf);
cc->mbuf = inverted_buf;
env->warnings_flag &= dup_not_warned; /* TODO: fix false warning */
}