Don't use single byte optimization on grapheme clusters

Unicode Text Segmentation considers CRLF as a character. [Bug #15337] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@65954 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2018-11-24 11:53:19 +00:00 · 2018-11-24 11:53:19 +00:00 · 7850586af4
commit 7850586af4
parent f33adbc11e
2 changed files with 3 additions and 2 deletions
--- a/string.c
+++ b/string.c
@ -8459,7 +8459,7 @@ rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj)
    rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str));
    const char *ptr, *end;

-    if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) {
+    if (!rb_enc_unicode_p(enc)) {
 	return rb_str_length(str);
    }

@ -8487,7 +8487,7 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary)
    rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str));
    const char *ptr, *end;

-    if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) {
+    if (!rb_enc_unicode_p(enc)) {
 	return rb_str_enumerate_chars(str, ary);
    }

--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@ -973,6 +973,7 @@ CODE

  def test_each_grapheme_cluster
    [
+      "\u{0D 0A}",
      "\u{20 200d}",
      "\u{600 600}",
      "\u{600 20}",