Make String#rstrip{,!} raise Encoding::CompatibilityError for broken coderange

It's questionable whether we want to allow rstrip to work for strings
where the broken coderange occurs before the trailing whitespace and
not after, but this approach is probably simpler, and I don't think
users should expect string operations like rstrip to work on broken
strings.

In some cases, this changes rstrip to raise
Encoding::CompatibilityError instead of ArgumentError.  However, as
the problem is related to an encoding issue in the receiver, and due
not due to an issue with an argument, I think
Encoding::CompatibilityError is the more appropriate error.

Fixes [Bug #18931]
This commit is contained in:
Jeremy Evans 2022-08-24 10:31:17 -07:00
parent c6330cd32b
commit 571d21fd4a
Notes: git 2022-11-25 02:25:10 +00:00
4 changed files with 30 additions and 8 deletions

View File

@ -69,13 +69,27 @@ describe "String#rstrip!" do
-> { "".freeze.rstrip! }.should raise_error(FrozenError)
end
it "raises an ArgumentError if the last non-space codepoint is invalid" do
s = "abc\xDF".force_encoding(Encoding::UTF_8)
s.valid_encoding?.should be_false
-> { s.rstrip! }.should raise_error(ArgumentError)
ruby_version_is "3.2" do
it "raises an Encoding::CompatibilityError if the last non-space codepoint is invalid" do
s = "abc\xDF".force_encoding(Encoding::UTF_8)
s.valid_encoding?.should be_false
-> { s.rstrip! }.should raise_error(Encoding::CompatibilityError)
s = "abc\xDF ".force_encoding(Encoding::UTF_8)
s.valid_encoding?.should be_false
-> { s.rstrip! }.should raise_error(ArgumentError)
s = "abc\xDF ".force_encoding(Encoding::UTF_8)
s.valid_encoding?.should be_false
-> { s.rstrip! }.should raise_error(Encoding::CompatibilityError)
end
end
ruby_version_is ""..."3.2" do
it "raises an ArgumentError if the last non-space codepoint is invalid" do
s = "abc\xDF".force_encoding(Encoding::UTF_8)
s.valid_encoding?.should be_false
-> { s.rstrip! }.should raise_error(ArgumentError)
s = "abc\xDF ".force_encoding(Encoding::UTF_8)
s.valid_encoding?.should be_false
-> { s.rstrip! }.should raise_error(ArgumentError)
end
end
end

View File

@ -9720,6 +9720,9 @@ rstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc)
const char *t;
rb_str_check_dummy_enc(enc);
if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) {
rb_raise(rb_eEncCompatError, "invalid byte sequence in %s", rb_enc_name(enc));
}
if (!s || s >= e) return 0;
t = e;

View File

@ -28,7 +28,7 @@ class HTTPHeaderTest < Test::Unit::TestCase
assert_raise(NoMethodError){ @c.initialize_http_header("foo"=>[]) }
assert_raise(ArgumentError){ @c.initialize_http_header("foo"=>"a\nb") }
assert_raise(ArgumentError){ @c.initialize_http_header("foo"=>"a\rb") }
assert_raise(ArgumentError){ @c.initialize_http_header("foo"=>"a\xff") }
assert_raise(Encoding::CompatibilityError){ @c.initialize_http_header("foo"=>"a\xff") }
end
def test_initialize_with_symbol

View File

@ -2817,6 +2817,11 @@ CODE
assert_equal("\u3042", s5)
assert_raise(Encoding::CompatibilityError) { S("\u3042".encode("ISO-2022-JP")).rstrip! }
assert_raise(Encoding::CompatibilityError) { S("abc \x80 ".force_encoding('UTF-8')).rstrip! }
assert_raise(Encoding::CompatibilityError) { S("abc\x80 ".force_encoding('UTF-8')).rstrip! }
assert_raise(Encoding::CompatibilityError) { S("abc \x80".force_encoding('UTF-8')).rstrip! }
assert_raise(Encoding::CompatibilityError) { S("\x80".force_encoding('UTF-8')).rstrip! }
assert_raise(Encoding::CompatibilityError) { S(" \x80 ".force_encoding('UTF-8')).rstrip! }
end
def test_lstrip