From 593505ac6f802d2b5bff469425b7c76b65cc9b10 Mon Sep 17 00:00:00 2001 From: nobu Date: Tue, 5 Mar 2019 00:32:15 +0000 Subject: [PATCH] string.c: respect the actual encoding * string.c (rb_enc_str_coderange): respect the actual encoding of if a BOM presents, and scan for the actual code range. [ruby-core:91662] [Bug #15635] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67167 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- string.c | 5 +++-- test/ruby/test_m17n.rb | 7 +++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/string.c b/string.c index 88686db76d..449fb558db 100644 --- a/string.c +++ b/string.c @@ -655,12 +655,13 @@ rb_enc_str_coderange(VALUE str) if (cr == ENC_CODERANGE_UNKNOWN) { int encidx = ENCODING_GET(str); rb_encoding *enc = rb_enc_from_index(encidx); - if (rb_enc_mbminlen(enc) > 1 && rb_enc_dummy_p(enc)) { + if (rb_enc_mbminlen(enc) > 1 && rb_enc_dummy_p(enc) && + rb_enc_mbminlen(enc = get_actual_encoding(encidx, str)) == 1) { cr = ENC_CODERANGE_BROKEN; } else { cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), - get_actual_encoding(encidx, str)); + enc); } ENC_CODERANGE_SET(str, cr); } diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index c1184c551f..75daf61376 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -269,6 +269,13 @@ class TestM17N < Test::Unit::TestCase assert_empty(encs, bug10598) end + def test_utf_without_bom_valid + encs = [Encoding::UTF_16, Encoding::UTF_32].find_all {|enc| + !(+"abcd").encode!(enc).force_encoding(enc).valid_encoding? + } + assert_empty(encs) + end + def test_object_utf16_32_inspect EnvUtil.suppress_warning do begin