From 5b1bf8dd2d08ae7371ecf025967376bb794ed651 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 13 Aug 2019 23:23:43 +0900 Subject: [PATCH] UTF LE is fixed at least the first 2 bytes * io.c (io_strip_bom): if the first 2 bytes are 0xFF0xFE, it should be a little-endian UTF, 16 or 32. [Bug #16099] --- io.c | 7 ++----- test/ruby/test_file.rb | 2 +- test/ruby/test_io_m17n.rb | 4 ++-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/io.c b/io.c index 69dbc93cff..6a4c5b978b 100644 --- a/io.c +++ b/io.c @@ -6136,12 +6136,9 @@ io_strip_bom(VALUE io) return ENCINDEX_UTF_32LE; } rb_io_ungetbyte(io, b4); - rb_io_ungetbyte(io, b3); - } - else { - rb_io_ungetbyte(io, b3); - return ENCINDEX_UTF_16LE; } + rb_io_ungetbyte(io, b3); + return ENCINDEX_UTF_16LE; } rb_io_ungetbyte(io, b2); break; diff --git a/test/ruby/test_file.rb b/test/ruby/test_file.rb index 36c154d36c..3deab76e93 100644 --- a/test/ruby/test_file.rb +++ b/test/ruby/test_file.rb @@ -87,7 +87,7 @@ class TestFile < Test::Unit::TestCase end def test_bom_32le - assert_bom(["\xFF\xFE\0", "\0"], __method__) + assert_bom(["\xFF", "\xFE\0\0"], __method__) end def test_truncate_wbuf diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index 630f2eec08..8101bfb62f 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -2084,8 +2084,8 @@ EOT define_method("test_strip_bom:#{name}") do path = "#{name}-bom.txt" with_tmpdir { - text = "\uFEFFa" - stripped = "a" + text = "\uFEFF\u0100a" + stripped = "\u0100a" content = text.encode(name) generate_file(path, content) result = File.read(path, mode: 'rb:BOM|UTF-8')