Don't use single byte optimization on grapheme clusters

Unicode Text Segmentation considers CRLF as a character. [Bug #15337]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@65954 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2018-11-24 11:53:19 +00:00
parent f33adbc11e
commit 7850586af4
2 changed files with 3 additions and 2 deletions

View File

@ -8459,7 +8459,7 @@ rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj)
rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str));
const char *ptr, *end;
if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) {
if (!rb_enc_unicode_p(enc)) {
return rb_str_length(str);
}
@ -8487,7 +8487,7 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary)
rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str));
const char *ptr, *end;
if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) {
if (!rb_enc_unicode_p(enc)) {
return rb_str_enumerate_chars(str, ary);
}

View File

@ -973,6 +973,7 @@ CODE
def test_each_grapheme_cluster
[
"\u{0D 0A}",
"\u{20 200d}",
"\u{600 600}",
"\u{600 20}",