From 81fc1cf2017840d0786cf997b4d2a80f80060001 Mon Sep 17 00:00:00 2001 From: akr Date: Fri, 22 Feb 2008 06:52:54 +0000 Subject: [PATCH] * encoding.c (rb_enc_mbclen): return minlen instead of 1 when a character is not found properly. * string.c (rb_enc_strlen): round up string length with fixed multibyte encoding such as UTF-32. (rb_enc_strlen_cr): ditto. (rb_str_substr): fix substring with fixed multibyte encoding. (rb_str_justify): check number of characters. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15573 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 11 +++++++++++ encoding.c | 6 ++++-- string.c | 13 ++++++------- test/ruby/test_utf32.rb | 27 +++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 9 deletions(-) create mode 100644 test/ruby/test_utf32.rb diff --git a/ChangeLog b/ChangeLog index 2d19f72903..82b6fd960a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +Fri Feb 22 15:47:36 2008 Tanaka Akira + + * encoding.c (rb_enc_mbclen): return minlen instead of 1 when + a character is not found properly. + + * string.c (rb_enc_strlen): round up string length with fixed + multibyte encoding such as UTF-32. + (rb_enc_strlen_cr): ditto. + (rb_str_substr): fix substring with fixed multibyte encoding. + (rb_str_justify): check number of characters. + Fri Feb 22 12:11:12 2008 NARUSE, Yui * string.c (rb_str_inspect): string of ascii incompatible encoding diff --git a/encoding.c b/encoding.c index 002c0c04fb..813b4b5062 100644 --- a/encoding.c +++ b/encoding.c @@ -738,8 +738,10 @@ rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc) int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p) return MBCLEN_CHARFOUND_LEN(n); - else - return 1; + else { + int min = rb_enc_mbminlen(enc); + return min <= e-p ? min : e-p; + } } int diff --git a/string.c b/string.c index 19e8072462..90ed51b108 100644 --- a/string.c +++ b/string.c @@ -618,7 +618,7 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc) const char *q; if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { - return (e - p) / rb_enc_mbminlen(enc); + return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc); } else if (rb_enc_asciicompat(enc)) { c = 0; @@ -651,7 +651,7 @@ rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr) *cr = 0; if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { - return (e - p) / rb_enc_mbminlen(enc); + return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc); } else if (rb_enc_asciicompat(enc)) { c = 0; @@ -1223,10 +1223,9 @@ rb_str_substr(VALUE str, long beg, long len) len = 0; } else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { - long rest = (e - p) / rb_enc_mbmaxlen(enc); - if (len > rest) - len = rest; - else + if (len * rb_enc_mbmaxlen(enc) > e - p) + len = e - p; + else len *= rb_enc_mbmaxlen(enc); } else { @@ -5777,7 +5776,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag) flen = RSTRING_LEN(pad); fclen = str_strlen(pad, enc); singlebyte = single_byte_optimizable(pad); - if (flen == 0) { + if (flen == 0 || fclen == 0) { rb_raise(rb_eArgError, "zero width padding"); } } diff --git a/test/ruby/test_utf32.rb b/test/ruby/test_utf32.rb new file mode 100644 index 0000000000..f81524f29e --- /dev/null +++ b/test/ruby/test_utf32.rb @@ -0,0 +1,27 @@ +require 'test/unit' + +class TestUTF32 < Test::Unit::TestCase + def encdump(str) + d = str.dump + if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d + d + else + "#{d}.force_encoding(#{str.encoding.name.dump})" + end + end + + def assert_str_equal(expected, actual, message=nil) + full_message = build_message(message, <