From a7322e3678d8c5be0e528a346fa42444325e5349 Mon Sep 17 00:00:00 2001 From: kou Date: Sun, 28 Oct 2012 12:42:37 +0000 Subject: [PATCH] * lib/rexml/parsers/baseparser.rb: Fix a bug that UTF-8 is used for UTF-16XX encoded XML that doesn't have encoding="UTF-16" in XML declration. * test/rexml/test_document.rb: Add tests for the above change. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37363 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 7 +++++++ lib/rexml/parsers/baseparser.rb | 3 +++ test/rexml/test_document.rb | 22 ++++++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/ChangeLog b/ChangeLog index e48e9b7be3..5e6e4687ac 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Sun Oct 28 21:40:13 2012 Kouhei Sutou + + * lib/rexml/parsers/baseparser.rb: Fix a bug that UTF-8 is used + for UTF-16XX encoded XML that doesn't have encoding="UTF-16" in + XML declration. + * test/rexml/test_document.rb: Add tests for the above change. + Sun Oct 28 21:37:34 2012 Kouhei Sutou * test/rexml/test_document.rb: Group tests that they parse diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index dc4a1c8bee..a88896c5db 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -215,6 +215,9 @@ module REXML if need_source_encoding_update?(encoding) @source.encoding = encoding end + if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding + encoding = "UTF-16" + end standalone = STANDALONE.match(results) standalone = standalone[1] unless standalone.nil? return [ :xmldecl, version, encoding, standalone ] diff --git a/test/rexml/test_document.rb b/test/rexml/test_document.rb index 028fa988a6..4c5d7d1dd8 100644 --- a/test/rexml/test_document.rb +++ b/test/rexml/test_document.rb @@ -246,5 +246,27 @@ EOX assert_equal("UTF-16", document.encoding) end end + + class NoEncodingTest < self + def test_utf_16le + xml = <<-EOX.encode("UTF-16LE").force_encoding("ASCII-8BIT") + +Hello world! +EOX + bom = "\ufeff".encode("UTF-16LE").force_encoding("ASCII-8BIT") + document = REXML::Document.new(bom + xml) + assert_equal("UTF-16", document.encoding) + end + + def test_utf_16be + xml = <<-EOX.encode("UTF-16BE").force_encoding("ASCII-8BIT") + +Hello world! +EOX + bom = "\ufeff".encode("UTF-16BE").force_encoding("ASCII-8BIT") + document = REXML::Document.new(bom + xml) + assert_equal("UTF-16", document.encoding) + end + end end end