From 7850586af435f44ff97c93decc97995bbdf6bad4 Mon Sep 17 00:00:00 2001 From: naruse Date: Sat, 24 Nov 2018 11:53:19 +0000 Subject: [PATCH] Don't use single byte optimization on grapheme clusters Unicode Text Segmentation considers CRLF as a character. [Bug #15337] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@65954 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- string.c | 4 ++-- test/ruby/test_string.rb | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/string.c b/string.c index d7ef9cc9bc..26d03d1835 100644 --- a/string.c +++ b/string.c @@ -8459,7 +8459,7 @@ rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj) rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str)); const char *ptr, *end; - if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) { + if (!rb_enc_unicode_p(enc)) { return rb_str_length(str); } @@ -8487,7 +8487,7 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary) rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str)); const char *ptr, *end; - if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) { + if (!rb_enc_unicode_p(enc)) { return rb_str_enumerate_chars(str, ary); } diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index 1a6d87f11f..014eb5ec15 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -973,6 +973,7 @@ CODE def test_each_grapheme_cluster [ + "\u{0D 0A}", "\u{20 200d}", "\u{600 600}", "\u{600 20}",