Make IO#set_encoding with binary external encoding use nil internal encoding

This was already the behavior when a single `'external:internal'`
encoding specifier string was passed.  This makes the behavior
consistent for the case where separate external and internal
encoding specifiers are provided.

While here, fix the IO#set_encoding method documentation to
state that either the first or second argument can be a string
with an encoding name, and describe the behavior when the
external encoding is binary.

Fixes [Bug #18899]
This commit is contained in:
Jeremy Evans 2022-08-23 12:22:24 -07:00
parent a4e9606da4
commit 0903a25179
Notes: git 2023-01-01 17:00:45 +00:00
3 changed files with 103 additions and 8 deletions

15
io.c
View File

@ -11573,6 +11573,11 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, VALUE v2, VALUE opt)
enc2 = NULL;
}
}
if (enc2 == rb_ascii8bit_encoding()) {
/* If external is ASCII-8BIT, no transcoding */
enc = enc2;
enc2 = NULL;
}
SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags);
}
@ -13393,10 +13398,12 @@ rb_io_internal_encoding(VALUE io)
*
* See {Encodings}[rdoc-ref:File@Encodings].
*
* Argument +ext_enc+, if given, must be an Encoding object;
* Argument +ext_enc+, if given, must be an Encoding object
* or a String with the encoding name;
* it is assigned as the encoding for the stream.
*
* Argument +int_enc+, if given, must be an Encoding object;
* Argument +int_enc+, if given, must be an Encoding object
* or a String with the encoding name;
* it is assigned as the encoding for the internal string.
*
* Argument <tt>'ext_enc:int_enc'</tt>, if given, is a string
@ -13404,6 +13411,10 @@ rb_io_internal_encoding(VALUE io)
* corresponding Encoding objects are assigned as the external
* and internal encodings for the stream.
*
* If the external encoding of a string is binary/ASCII-8BIT,
* the internal encoding of the string is set to nil, since no
* transcoding is needed.
*
* Optional keyword arguments +enc_opts+ specify
* {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options].
*

View File

@ -301,11 +301,23 @@ describe "IO#gets" do
@io.gets.encoding.should == Encoding::BINARY
end
it "transcodes to internal encoding if the IO object's external encoding is BINARY" do
Encoding.default_external = Encoding::BINARY
Encoding.default_internal = Encoding::UTF_8
@io = new_io @name, 'r'
@io.set_encoding Encoding::BINARY, Encoding::UTF_8
@io.gets.encoding.should == Encoding::UTF_8
ruby_version_is ''...'3.3' do
it "transcodes to internal encoding if the IO object's external encoding is BINARY" do
Encoding.default_external = Encoding::BINARY
Encoding.default_internal = Encoding::UTF_8
@io = new_io @name, 'r'
@io.set_encoding Encoding::BINARY, Encoding::UTF_8
@io.gets.encoding.should == Encoding::UTF_8
end
end
ruby_version_is '3.3' do
it "ignores the internal encoding if the IO object's external encoding is BINARY" do
Encoding.default_external = Encoding::BINARY
Encoding.default_internal = Encoding::UTF_8
@io = new_io @name, 'r'
@io.set_encoding Encoding::BINARY, Encoding::UTF_8
@io.gets.encoding.should == Encoding::BINARY
end
end
end

View File

@ -1158,6 +1158,78 @@ EOT
end
end
def test_set_encoding_argument_parsing
File.open(File::NULL) do |f|
f.set_encoding('binary')
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding(Encoding.find('binary'))
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding('binary:utf-8')
assert_equal(nil, f.internal_encoding)
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding('binary', 'utf-8')
assert_equal(nil, f.internal_encoding)
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding(Encoding.find('binary'), Encoding.find('utf-8'))
assert_equal(nil, f.internal_encoding)
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding('binary', Encoding.find('utf-8'))
assert_equal(nil, f.internal_encoding)
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding(Encoding.find('binary'), 'utf-8')
assert_equal(nil, f.internal_encoding)
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding('iso-8859-1:utf-8')
assert_equal(Encoding::UTF_8, f.internal_encoding)
assert_equal(Encoding::ISO_8859_1, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding('iso-8859-1', 'utf-8')
assert_equal(Encoding::UTF_8, f.internal_encoding)
assert_equal(Encoding::ISO_8859_1, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding(Encoding.find('iso-8859-1'), Encoding.find('utf-8'))
assert_equal(Encoding::UTF_8, f.internal_encoding)
assert_equal(Encoding::ISO_8859_1, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding('iso-8859-1', Encoding.find('utf-8'))
assert_equal(Encoding::UTF_8, f.internal_encoding)
assert_equal(Encoding::ISO_8859_1, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding(Encoding.find('iso-8859-1'), 'utf-8')
assert_equal(Encoding::UTF_8, f.internal_encoding)
assert_equal(Encoding::ISO_8859_1, f.external_encoding)
end
end
def test_textmode_twice
assert_raise(ArgumentError) {
open(__FILE__, "rt", textmode: true) {|f|