* io.c (io_fwrite): raise an error if ASCII incompatible string

written for text mode IO without encoding conversion.
  (rb_io_extract_modeenc): binmode requirement changed.



git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19273 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
akr 2008-09-09 14:56:55 +00:00
parent 0bbda39758
commit dda64cab99
3 changed files with 197 additions and 31 deletions

View File

@ -1,3 +1,9 @@
Tue Sep 9 23:55:26 2008 Tanaka Akira <akr@fsij.org>
* io.c (io_fwrite): raise an error if ASCII incompatible string
written for text mode IO without encoding conversion.
(rb_io_extract_modeenc): binmode requirement changed.
Tue Sep 9 21:59:48 2008 Takeyuki Fujioka <xibbar@ruby-lang.org> Tue Sep 9 21:59:48 2008 Takeyuki Fujioka <xibbar@ruby-lang.org>
* lib/cgi*: split cgi.rb into four files. [ruby-dev:36041] * lib/cgi*: split cgi.rb into four files. [ruby-dev:36041]

9
io.c
View File

@ -756,6 +756,10 @@ io_fwrite(VALUE str, rb_io_t *fptr)
if (fptr->writeconv) { if (fptr->writeconv) {
if (!NIL_P(fptr->writeconv_stateless)) if (!NIL_P(fptr->writeconv_stateless))
common_encoding = fptr->writeconv_stateless; common_encoding = fptr->writeconv_stateless;
else if (!rb_enc_asciicompat(rb_enc_get(str))) {
rb_raise(rb_eArgError, "ASCII incompatible string written for text mode IO without encoding conversion: %s",
rb_enc_name(rb_enc_get(str)));
}
} }
else { else {
if (fptr->encs.enc2) if (fptr->encs.enc2)
@ -3908,7 +3912,10 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE opthash,
if ((fmode & FMODE_BINMODE) && (fmode & FMODE_TEXTMODE)) if ((fmode & FMODE_BINMODE) && (fmode & FMODE_TEXTMODE))
rb_raise(rb_eArgError, "both textmode and binmode specified"); rb_raise(rb_eArgError, "both textmode and binmode specified");
if (enc && !rb_enc_asciicompat(enc) && !(fmode & FMODE_BINMODE)) if ((fmode & FMODE_READABLE) &&
!enc2 &&
!(fmode & FMODE_BINMODE) &&
!rb_enc_asciicompat(enc ? enc : rb_default_external_encoding()))
rb_raise(rb_eArgError, "ASCII incompatible encoding needs binmode"); rb_raise(rb_eArgError, "ASCII incompatible encoding needs binmode");
*vmode_p = vmode; *vmode_p = vmode;

View File

@ -678,6 +678,7 @@ EOT
def test_getc_invalid3 def test_getc_invalid3
with_pipe("utf-16le:euc-jp") {|r, w| with_pipe("utf-16le:euc-jp") {|r, w|
w.binmode
before1 = "\x42\x30".force_encoding("utf-16le") before1 = "\x42\x30".force_encoding("utf-16le")
before2 = "\x44\x30".force_encoding("utf-16le") before2 = "\x44\x30".force_encoding("utf-16le")
invalid = "\x00\xd8".force_encoding("utf-16le") invalid = "\x00\xd8".force_encoding("utf-16le")
@ -1226,34 +1227,31 @@ EOT
} }
end end
def test_textmode_read_ascii_incompat_internal def test_read_newline_conversion_with_encoding_conversion
with_tmpdir { with_tmpdir {
# ascii incompatible internal encoding needs binmode. generate_file("t.utf8.crlf", "a\r\nb\r\n")
assert_raise(ArgumentError) { open("t.utf8.crlf", "rb:utf-8:utf-16be") {|f|
open("t.utf8.crlf", "rt:utf-8:utf-16be") {|f| } content = f.read
assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content)
} }
assert_raise(ArgumentError) { open("t.utf8.crlf", "rt:utf-8:utf-16be") {|f|
open("t.utf8.crlf", "r:utf-8:utf-16be") {|f| } content = f.read
assert_equal("\0a\0\n\0b\0\n".force_encoding("UTF-16BE"), content)
} }
assert_raise(ArgumentError) { open("t.utf8.crlf", "r:utf-8:utf-16be") {|f|
open("t.utf16.crlf", "rt:utf-16be") {|f| } content = f.read
} if system_newline == "\n"
assert_raise(ArgumentError) { assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content)
open("t.utf16.crlf", "r:utf-16be") {|f| } else
assert_equal("\0a\0\n\0b\0\n".force_encoding("UTF-16BE"), content)
end
} }
} }
end end
def test_binmode_read_ascii_incompat_internal def test_read_newline_conversion_without_encoding_conversion
with_tmpdir { with_tmpdir {
generate_file("t.utf8.crlf", "a\r\nb\r\n")
generate_file("t.utf16.crlf", "\0a\0\r\0\n\0b\0\r\0\n") generate_file("t.utf16.crlf", "\0a\0\r\0\n\0b\0\r\0\n")
# ascii incompatible internal encoding needs binmode.
open("t.utf8.crlf", "rb:utf-8:utf-16be") {|f|
content = f.read
assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"),
content)
}
open("t.utf16.crlf", "rb:utf-16be") {|f| open("t.utf16.crlf", "rb:utf-16be") {|f|
content = f.read content = f.read
assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"),
@ -1262,27 +1260,182 @@ EOT
} }
end end
def test_textmode_write_ascii_incompat_internal def test_read_newline_conversion_error
with_tmpdir { with_tmpdir {
# ascii incompatible internal encoding needs binmode. generate_file("empty.txt", "")
# ascii incompatible encoding without conversion needs binmode.
assert_raise(ArgumentError) { assert_raise(ArgumentError) {
open("t.utf8", "wt:utf-8:utf-16be") {|f| } open("empty.txt", "rt:utf-16be") {|f| }
} }
assert_raise(ArgumentError) { assert_raise(ArgumentError) {
open("t.utf8", "w:utf-8:utf-16be") {|f| } open("empty.txt", "r:utf-16be") {|f| }
} }
assert_raise(ArgumentError) { }
open("t.utf8", "w:utf-8:utf-16be") {|f| } end
def test_read_mode
with_tmpdir {
generate_file("t", "a\rb\r\nc\n\xc2\xa2")
generate_file("ie", "a\rb\r\nc\n\e$B\x42\x22\e(B")
generate_file("iu", "a\rb\r\nc\n\e$B\x21\x71\e(B")
generate_file("be", "\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35")
generate_file("bu", "\0a\0\r\0b\0\r\0\n\0c\0\n\0\xa2")
# "\xc2\xa2" is valid as EUC-JP and UTF-8
# EUC-JP UTF-8 Unicode
# 0xC2A2 0xE894B5 U+8535
# 0xA1F1 0xC2A2 U+00A2
open("t","rt") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding(Encoding.default_external), f.read) }
open("t","rb") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding(Encoding.default_external), f.read) }
open("t","rt:euc-jp") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
open("t","rb:euc-jp") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
open("t","rt:utf-8") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
open("t","rb:utf-8") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
assert_raise(ArgumentError) { open("t", "rt:iso-2022-jp") {|f| } }
open("t","rb:iso-2022-jp") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("ISO-2022-JP"), f.read) }
open("t","rt:euc-jp:utf-8") {|f| assert_equal("a\nb\nc\n\u8535", f.read) }
open("t","rt:utf-8:euc-jp") {|f| assert_equal("a\nb\nc\n\xa1\xf1".force_encoding("EUC-JP"), f.read) }
open("t","rb:euc-jp:utf-8") {|f| assert_equal("a\rb\r\nc\n\u8535", f.read) }
open("t","rb:utf-8:euc-jp") {|f| assert_equal("a\rb\r\nc\n\xa1\xf1".force_encoding("EUC-JP"), f.read) }
open("t","rt:euc-jp:iso-2022-jp"){|f| assert_equal("a\nb\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"), f.read) }
open("t","rt:utf-8:iso-2022-jp"){|f| assert_equal("a\nb\nc\n\e$B\x21\x71\e(B".force_encoding("ISO-2022-JP"), f.read) }
open("t","rt:euc-jp:utf-16be"){|f| assert_equal("\0a\0\n\0b\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"), f.read) }
open("t","rt:utf-8:utf-16be"){|f| assert_equal("\0a\0\n\0b\0\n\0c\0\n\0\xa2".force_encoding("UTF-16BE"), f.read) }
open("t","rb:euc-jp:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)}
open("t","rb:utf-8:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x21\x71\e(B".force_encoding("ISO-2022-JP"),f.read)}
open("t","rb:euc-jp:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)}
open("t","rb:utf-8:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\0\xa2".force_encoding("UTF-16BE"),f.read)}
open("ie","rt:iso-2022-jp:euc-jp"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
open("iu","rt:iso-2022-jp:utf-8"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
open("be","rt:utf-16be:euc-jp"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
open("bu","rt:utf-16be:utf-8"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
open("ie","rb:iso-2022-jp:euc-jp"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"),f.read)}
open("iu","rb:iso-2022-jp:utf-8"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"),f.read)}
open("be","rb:utf-16be:euc-jp"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"),f.read)}
open("bu","rb:utf-16be:utf-8"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"),f.read)}
open("ie","rt:iso-2022-jp:utf-16be"){|f|assert_equal("\0a\0\n\0b\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)}
open("be","rt:utf-16be:iso-2022-jp"){|f|assert_equal("a\nb\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)}
open("ie","rb:iso-2022-jp:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)}
open("be","rb:utf-16be:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)}
}
end
def assert_write(expected, mode, *args)
with_tmpdir {
open("t", mode) {|f|
args.each {|arg| f.print arg }
} }
assert_raise(ArgumentError) { content = File.read("t", :mode=>"rb:ascii-8bit")
open("t.utf16", "wt:utf-16be") {|f| } assert_equal(expected.dup.force_encoding("ascii-8bit"),
} content.force_encoding("ascii-8bit"))
assert_raise(ArgumentError) { }
open("t.utf16", "w:utf-16be") {|f| } end
def test_write_mode
# "\xc2\xa2" is valid as EUC-JP and UTF-8
# EUC-JP UTF-8 Unicode
# 0xC2A2 0xE894B5 U+8535
# 0xA1F1 0xC2A2 U+00A2
a = "a\rb\r\nc\n"
e = "\xc2\xa2".force_encoding("euc-jp")
u8 = "\xc2\xa2".force_encoding("utf-8")
u16 = "\x85\x35\0\r\x00\xa2\0\r\0\n\0\n".force_encoding("utf-16be")
i = "\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n".force_encoding("iso-2022-jp")
n = system_newline
un = n.encode("utf-16be").force_encoding("ascii-8bit")
assert_write("a\rb\r#{n}c#{n}", "wt", a)
assert_write("\xc2\xa2", "wt", e)
assert_write("\xc2\xa2", "wt", u8)
assert_write("a\rb\r\nc\n", "wb", a)
assert_write("\xc2\xa2", "wb", e)
assert_write("\xc2\xa2", "wb", u8)
#assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wt", u16) should raise
#assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wt", i) should raise
assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb", u16)
assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb", i)
t_write_mode_enc
t_write_mode_enc(":utf-8")
end
def t_write_mode_enc(enc="")
# "\xc2\xa2" is valid as EUC-JP and UTF-8
# EUC-JP UTF-8 Unicode
# 0xC2A2 0xE894B5 U+8535
# 0xA1F1 0xC2A2 U+00A2
a = "a\rb\r\nc\n"
e = "\xc2\xa2".force_encoding("euc-jp")
u8 = "\xc2\xa2".force_encoding("utf-8")
u16 = "\x85\x35\0\r\x00\xa2\0\r\0\n\0\n".force_encoding("utf-16be")
i = "\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n".force_encoding("iso-2022-jp")
n = system_newline
un = n.encode("utf-16be").force_encoding("ascii-8bit")
assert_write("a\rb\r#{n}c#{n}", "wt:euc-jp#{enc}", a)
assert_write("\xc2\xa2", "wt:euc-jp#{enc}", e)
assert_write("\xa1\xf1", "wt:euc-jp#{enc}", u8)
assert_write("a\rb\r\nc\n", "wb:euc-jp#{enc}", a)
assert_write("\xc2\xa2", "wb:euc-jp#{enc}", e)
assert_write("\xa1\xf1", "wb:euc-jp#{enc}", u8)
assert_write("\xc2\xa2\r\xa1\xf1\r#{n}#{n}", "wt:euc-jp#{enc}", u16)
assert_write("\xc2\xa2\r\xa1\xf1\r#{n}#{n}", "wt:euc-jp#{enc}", i)
assert_write("\xc2\xa2\r\xa1\xf1\r\n\n", "wb:euc-jp#{enc}", u16)
assert_write("\xc2\xa2\r\xa1\xf1\r\n\n", "wb:euc-jp#{enc}", i)
assert_write("\0a\0\r\0b\0\r#{un}\0c#{un}", "wt:utf-16be#{enc}", a)
assert_write("\x85\x35", "wt:utf-16be#{enc}", e)
assert_write("\x00\xa2", "wt:utf-16be#{enc}", u8)
assert_write("a\rb\r#{n}c#{n}", "wt:iso-2022-jp#{enc}", a)
assert_write("\e$B\x42\x22\e(B", "wt:iso-2022-jp#{enc}", e)
assert_write("\e$B\x21\x71\e(B", "wt:iso-2022-jp#{enc}", u8)
assert_write("\0a\0\r\0b\0\r\0\n\0c\0\n", "wb:utf-16be#{enc}", a)
assert_write("\x85\x35", "wb:utf-16be#{enc}", e)
assert_write("\x00\xa2", "wb:utf-16be#{enc}", u8)
assert_write("a\rb\r\nc\n", "wb:iso-2022-jp#{enc}", a)
assert_write("\e$B\x42\x22\e(B", "wb:iso-2022-jp#{enc}", e)
assert_write("\e$B\x21\x71\e(B", "wb:iso-2022-jp#{enc}", u8)
assert_write("\x85\x35\0\r\x00\xa2\0\r#{un}#{un}", "wt:utf-16be#{enc}", u16)
assert_write("\x85\x35\0\r\x00\xa2\0\r#{un}#{un}", "wt:utf-16be#{enc}", i)
assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb:utf-16be#{enc}", u16)
assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb:utf-16be#{enc}", i)
assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r#{n}#{n}", "wt:iso-2022-jp#{enc}", u16)
assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r#{n}#{n}", "wt:iso-2022-jp#{enc}", i)
assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb:iso-2022-jp#{enc}", u16)
assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb:iso-2022-jp#{enc}", i)
end
def test_write_mode_fail
return if system_newline == "\n"
with_tmpdir {
open("t", "wt") {|f|
assert_raise(ArgumentError) { f.print "\0\r\0\r\0\n\0\n".force_encoding("utf-16be") }
} }
} }
end end
def test_write_ascii_incompat
with_tmpdir {
open("t.utf8", "wb:utf-8:utf-16be") {|f| }
open("t.utf8", "wt:utf-8:utf-16be") {|f| }
open("t.utf8", "w:utf-8:utf-16be") {|f| }
open("t.utf16", "wb:utf-16be") {|f| }
open("t.utf16", "wt:utf-16be") {|f| }
open("t.utf16", "w:utf-16be") {|f| }
}
end
def test_binmode_write_ascii_incompat_internal def test_binmode_write_ascii_incompat_internal
with_tmpdir { with_tmpdir {
open("t.utf8.lf", "wb:utf-8:utf-16be") {|f| open("t.utf8.lf", "wb:utf-8:utf-16be") {|f|