String#force_encoding don't clear coderange if encoding is unchanged

Some code out there blind calls `force_encoding` without checking what the original encoding was, which clears the coderange uselessly. If the String is big, it can be a rather costly mistake. For instance the `rack-utf8_sanitizer` gem does this on request bodies.
2023-11-08 14:05:52 +01:00 · 2023-11-08 14:05:52 +01:00 · ea1b1ea1aa
commit ea1b1ea1aa
parent 0a7e620a36
3 changed files with 19 additions and 2 deletions
--- a/string.c
+++ b/string.c
@ -10843,7 +10843,23 @@ static VALUE
 rb_str_force_encoding(VALUE str, VALUE enc)
 {
    str_modifiable(str);
-    rb_enc_associate(str, rb_to_encoding(enc));
+
+    rb_encoding *encoding = rb_to_encoding(enc);
+    int idx = rb_enc_to_index(encoding);
+
+    // If the encoding is unchanged, we do nothing.
+    if (ENCODING_GET(str) == idx) {
+        return str;
+    }
+
+    rb_enc_associate_index(str, idx);
+
+    // If the coderange was 7bit and the new encoding is ASCII-compatible
+    // we can keep the coderange.
+    if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT && encoding && rb_enc_asciicompat(encoding)) {
+        return str;
+    }
+
    ENC_CODERANGE_CLEAR(str);
    return str;
 }
--- a/test/objspace/test_objspace.rb
+++ b/test/objspace/test_objspace.rb
@ -585,7 +585,7 @@ class TestObjSpace < Test::Unit::TestCase

  def test_dump_string_coderange
    assert_includes ObjectSpace.dump("TEST STRING"), '"coderange":"7bit"'
-    unknown = "TEST STRING".dup.force_encoding(Encoding::BINARY)
+    unknown = "TEST STRING".dup.force_encoding(Encoding::UTF_16BE)
    2.times do # ensure that dumping the string doesn't mutate it
      assert_includes ObjectSpace.dump(unknown), '"coderange":"unknown"'
    end
--- a/test/ruby/test_process.rb
+++ b/test/ruby/test_process.rb
@ -2747,6 +2747,7 @@ EOS
    require 'objspace'
    begin;
      obj = "a" * 12
+      obj.force_encoding(Encoding::UTF_16LE)
      obj.force_encoding(Encoding::BINARY)
      assert_include(ObjectSpace.dump(obj), '"coderange":"unknown"')
      Process.warmup