String#force_encoding don't clear coderange if encoding is unchanged
Some code out there blind calls `force_encoding` without checking what the original encoding was, which clears the coderange uselessly. If the String is big, it can be a rather costly mistake. For instance the `rack-utf8_sanitizer` gem does this on request bodies.
This commit is contained in:
parent
0a7e620a36
commit
ea1b1ea1aa
18
string.c
18
string.c
@ -10843,7 +10843,23 @@ static VALUE
|
||||
rb_str_force_encoding(VALUE str, VALUE enc)
|
||||
{
|
||||
str_modifiable(str);
|
||||
rb_enc_associate(str, rb_to_encoding(enc));
|
||||
|
||||
rb_encoding *encoding = rb_to_encoding(enc);
|
||||
int idx = rb_enc_to_index(encoding);
|
||||
|
||||
// If the encoding is unchanged, we do nothing.
|
||||
if (ENCODING_GET(str) == idx) {
|
||||
return str;
|
||||
}
|
||||
|
||||
rb_enc_associate_index(str, idx);
|
||||
|
||||
// If the coderange was 7bit and the new encoding is ASCII-compatible
|
||||
// we can keep the coderange.
|
||||
if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT && encoding && rb_enc_asciicompat(encoding)) {
|
||||
return str;
|
||||
}
|
||||
|
||||
ENC_CODERANGE_CLEAR(str);
|
||||
return str;
|
||||
}
|
||||
|
@ -585,7 +585,7 @@ class TestObjSpace < Test::Unit::TestCase
|
||||
|
||||
def test_dump_string_coderange
|
||||
assert_includes ObjectSpace.dump("TEST STRING"), '"coderange":"7bit"'
|
||||
unknown = "TEST STRING".dup.force_encoding(Encoding::BINARY)
|
||||
unknown = "TEST STRING".dup.force_encoding(Encoding::UTF_16BE)
|
||||
2.times do # ensure that dumping the string doesn't mutate it
|
||||
assert_includes ObjectSpace.dump(unknown), '"coderange":"unknown"'
|
||||
end
|
||||
|
@ -2747,6 +2747,7 @@ EOS
|
||||
require 'objspace'
|
||||
begin;
|
||||
obj = "a" * 12
|
||||
obj.force_encoding(Encoding::UTF_16LE)
|
||||
obj.force_encoding(Encoding::BINARY)
|
||||
assert_include(ObjectSpace.dump(obj), '"coderange":"unknown"')
|
||||
Process.warmup
|
||||
|
Loading…
x
Reference in New Issue
Block a user