String#force_encoding don't clear coderange if encoding is unchanged
Some code out there blind calls `force_encoding` without checking what the original encoding was, which clears the coderange uselessly. If the String is big, it can be a rather costly mistake. For instance the `rack-utf8_sanitizer` gem does this on request bodies.
This commit is contained in:
parent
0a7e620a36
commit
ea1b1ea1aa
18
string.c
18
string.c
@ -10843,7 +10843,23 @@ static VALUE
|
|||||||
rb_str_force_encoding(VALUE str, VALUE enc)
|
rb_str_force_encoding(VALUE str, VALUE enc)
|
||||||
{
|
{
|
||||||
str_modifiable(str);
|
str_modifiable(str);
|
||||||
rb_enc_associate(str, rb_to_encoding(enc));
|
|
||||||
|
rb_encoding *encoding = rb_to_encoding(enc);
|
||||||
|
int idx = rb_enc_to_index(encoding);
|
||||||
|
|
||||||
|
// If the encoding is unchanged, we do nothing.
|
||||||
|
if (ENCODING_GET(str) == idx) {
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
rb_enc_associate_index(str, idx);
|
||||||
|
|
||||||
|
// If the coderange was 7bit and the new encoding is ASCII-compatible
|
||||||
|
// we can keep the coderange.
|
||||||
|
if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT && encoding && rb_enc_asciicompat(encoding)) {
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
ENC_CODERANGE_CLEAR(str);
|
ENC_CODERANGE_CLEAR(str);
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
@ -585,7 +585,7 @@ class TestObjSpace < Test::Unit::TestCase
|
|||||||
|
|
||||||
def test_dump_string_coderange
|
def test_dump_string_coderange
|
||||||
assert_includes ObjectSpace.dump("TEST STRING"), '"coderange":"7bit"'
|
assert_includes ObjectSpace.dump("TEST STRING"), '"coderange":"7bit"'
|
||||||
unknown = "TEST STRING".dup.force_encoding(Encoding::BINARY)
|
unknown = "TEST STRING".dup.force_encoding(Encoding::UTF_16BE)
|
||||||
2.times do # ensure that dumping the string doesn't mutate it
|
2.times do # ensure that dumping the string doesn't mutate it
|
||||||
assert_includes ObjectSpace.dump(unknown), '"coderange":"unknown"'
|
assert_includes ObjectSpace.dump(unknown), '"coderange":"unknown"'
|
||||||
end
|
end
|
||||||
|
@ -2747,6 +2747,7 @@ EOS
|
|||||||
require 'objspace'
|
require 'objspace'
|
||||||
begin;
|
begin;
|
||||||
obj = "a" * 12
|
obj = "a" * 12
|
||||||
|
obj.force_encoding(Encoding::UTF_16LE)
|
||||||
obj.force_encoding(Encoding::BINARY)
|
obj.force_encoding(Encoding::BINARY)
|
||||||
assert_include(ObjectSpace.dump(obj), '"coderange":"unknown"')
|
assert_include(ObjectSpace.dump(obj), '"coderange":"unknown"')
|
||||||
Process.warmup
|
Process.warmup
|
||||||
|
Loading…
x
Reference in New Issue
Block a user