Make IO#set_encoding with binary external encoding use nil internal encoding

This was already the behavior when a single `'external:internal'`
encoding specifier string was passed.  This makes the behavior
consistent for the case where separate external and internal
encoding specifiers are provided.

While here, fix the IO#set_encoding method documentation to
state that either the first or second argument can be a string
with an encoding name, and describe the behavior when the
external encoding is binary.

Fixes [Bug #18899]
This commit is contained in:
Jeremy Evans 2022-08-23 12:22:24 -07:00
parent a4e9606da4
commit 0903a25179
Notes: git 2023-01-01 17:00:45 +00:00
3 changed files with 103 additions and 8 deletions

15
io.c
View File

@ -11573,6 +11573,11 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, VALUE v2, VALUE opt)
enc2 = NULL; enc2 = NULL;
} }
} }
if (enc2 == rb_ascii8bit_encoding()) {
/* If external is ASCII-8BIT, no transcoding */
enc = enc2;
enc2 = NULL;
}
SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags); ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags);
} }
@ -13393,10 +13398,12 @@ rb_io_internal_encoding(VALUE io)
* *
* See {Encodings}[rdoc-ref:File@Encodings]. * See {Encodings}[rdoc-ref:File@Encodings].
* *
* Argument +ext_enc+, if given, must be an Encoding object; * Argument +ext_enc+, if given, must be an Encoding object
* or a String with the encoding name;
* it is assigned as the encoding for the stream. * it is assigned as the encoding for the stream.
* *
* Argument +int_enc+, if given, must be an Encoding object; * Argument +int_enc+, if given, must be an Encoding object
* or a String with the encoding name;
* it is assigned as the encoding for the internal string. * it is assigned as the encoding for the internal string.
* *
* Argument <tt>'ext_enc:int_enc'</tt>, if given, is a string * Argument <tt>'ext_enc:int_enc'</tt>, if given, is a string
@ -13404,6 +13411,10 @@ rb_io_internal_encoding(VALUE io)
* corresponding Encoding objects are assigned as the external * corresponding Encoding objects are assigned as the external
* and internal encodings for the stream. * and internal encodings for the stream.
* *
* If the external encoding of a string is binary/ASCII-8BIT,
* the internal encoding of the string is set to nil, since no
* transcoding is needed.
*
* Optional keyword arguments +enc_opts+ specify * Optional keyword arguments +enc_opts+ specify
* {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options]. * {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options].
* *

View File

@ -301,6 +301,7 @@ describe "IO#gets" do
@io.gets.encoding.should == Encoding::BINARY @io.gets.encoding.should == Encoding::BINARY
end end
ruby_version_is ''...'3.3' do
it "transcodes to internal encoding if the IO object's external encoding is BINARY" do it "transcodes to internal encoding if the IO object's external encoding is BINARY" do
Encoding.default_external = Encoding::BINARY Encoding.default_external = Encoding::BINARY
Encoding.default_internal = Encoding::UTF_8 Encoding.default_internal = Encoding::UTF_8
@ -309,3 +310,14 @@ describe "IO#gets" do
@io.gets.encoding.should == Encoding::UTF_8 @io.gets.encoding.should == Encoding::UTF_8
end end
end end
ruby_version_is '3.3' do
it "ignores the internal encoding if the IO object's external encoding is BINARY" do
Encoding.default_external = Encoding::BINARY
Encoding.default_internal = Encoding::UTF_8
@io = new_io @name, 'r'
@io.set_encoding Encoding::BINARY, Encoding::UTF_8
@io.gets.encoding.should == Encoding::BINARY
end
end
end

View File

@ -1158,6 +1158,78 @@ EOT
end end
end end
def test_set_encoding_argument_parsing
File.open(File::NULL) do |f|
f.set_encoding('binary')
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding(Encoding.find('binary'))
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding('binary:utf-8')
assert_equal(nil, f.internal_encoding)
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding('binary', 'utf-8')
assert_equal(nil, f.internal_encoding)
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding(Encoding.find('binary'), Encoding.find('utf-8'))
assert_equal(nil, f.internal_encoding)
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding('binary', Encoding.find('utf-8'))
assert_equal(nil, f.internal_encoding)
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding(Encoding.find('binary'), 'utf-8')
assert_equal(nil, f.internal_encoding)
assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding('iso-8859-1:utf-8')
assert_equal(Encoding::UTF_8, f.internal_encoding)
assert_equal(Encoding::ISO_8859_1, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding('iso-8859-1', 'utf-8')
assert_equal(Encoding::UTF_8, f.internal_encoding)
assert_equal(Encoding::ISO_8859_1, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding(Encoding.find('iso-8859-1'), Encoding.find('utf-8'))
assert_equal(Encoding::UTF_8, f.internal_encoding)
assert_equal(Encoding::ISO_8859_1, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding('iso-8859-1', Encoding.find('utf-8'))
assert_equal(Encoding::UTF_8, f.internal_encoding)
assert_equal(Encoding::ISO_8859_1, f.external_encoding)
end
File.open(File::NULL) do |f|
f.set_encoding(Encoding.find('iso-8859-1'), 'utf-8')
assert_equal(Encoding::UTF_8, f.internal_encoding)
assert_equal(Encoding::ISO_8859_1, f.external_encoding)
end
end
def test_textmode_twice def test_textmode_twice
assert_raise(ArgumentError) { assert_raise(ArgumentError) {
open(__FILE__, "rt", textmode: true) {|f| open(__FILE__, "rt", textmode: true) {|f|