From 254d12215c3223b5820c1356dc12cd38eaa4cc37 Mon Sep 17 00:00:00 2001 From: matz Date: Mon, 20 Apr 2009 15:04:18 +0000 Subject: [PATCH] * string.c (rb_str_split_m): faster processing on 7bit strings. * string.c (ascii_isspace): faster isspace() for 7bit strings. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@23234 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 6 ++++++ string.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4b5c52ca6d..3e95650053 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Mon Apr 20 20:29:04 2009 Yukihiro Matsumoto + + * string.c (rb_str_split_m): faster processing on 7bit strings. + + * string.c (ascii_isspace): faster isspace() for 7bit strings. + Sun Apr 19 14:43:18 2009 Nobuyoshi Nakada * eval.c (ruby_cleanup): the order of local variables on stack is diff --git a/string.c b/string.c index 7d96cdbc16..599b995795 100644 --- a/string.c +++ b/string.c @@ -5382,6 +5382,26 @@ rb_str_count(int argc, VALUE *argv, VALUE str) return INT2NUM(i); } +static const char isspacetable[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +#define ascii_isspace(c) isspacetable[(unsigned char)(c)] /* * call-seq: @@ -5495,21 +5515,45 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str) unsigned int c; end = beg; - while (ptr < eptr) { - c = rb_enc_codepoint(ptr, eptr, enc); - ptr += rb_enc_mbclen(ptr, eptr, enc); - if (skip) { - if (rb_enc_isspace(c, enc)) { + if (is_ascii_string(str)) { + while (ptr < eptr) { + c = (unsigned char)*ptr++; + if (skip) { + if (ascii_isspace(c)) { + beg = ptr - bptr; + } + else { + end = ptr - bptr; + skip = 0; + if (!NIL_P(limit) && lim <= i) break; + } + } + else if (ascii_isspace(c)) { + rb_ary_push(result, rb_str_subseq(str, beg, end-beg)); + skip = 1; beg = ptr - bptr; + if (!NIL_P(limit)) ++i; } else { end = ptr - bptr; - skip = 0; - if (!NIL_P(limit) && lim <= i) break; } } - else { - if (rb_enc_isspace(c, enc)) { + } + else { + while (ptr < eptr) { + c = rb_enc_codepoint(ptr, eptr, enc); + ptr += rb_enc_mbclen(ptr, eptr, enc); + if (skip) { + if (rb_enc_isspace(c, enc)) { + beg = ptr - bptr; + } + else { + end = ptr - bptr; + skip = 0; + if (!NIL_P(limit) && lim <= i) break; + } + } + else if (rb_enc_isspace(c, enc)) { rb_ary_push(result, rb_str_subseq(str, beg, end-beg)); skip = 1; beg = ptr - bptr;