* string.c (str_nth): direct jump if string is 7bit only. great

performance boost for worst case.

* string.c (str_strlen): direct size if string is 7bit only.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14221 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
matz 2007-12-13 18:00:50 +00:00
parent e0334fb250
commit c958d9a9c6
2 changed files with 29 additions and 12 deletions

View File

@ -1,3 +1,10 @@
Fri Dec 14 02:55:41 2007 Yukihiro Matsumoto <matz@ruby-lang.org>
* string.c (str_nth): direct jump if string is 7bit only. great
performance boost for worst case.
* string.c (str_strlen): direct size if string is 7bit only.
Fri Dec 14 02:29:32 2007 Yukihiro Matsumoto <matz@ruby-lang.org> Fri Dec 14 02:29:32 2007 Yukihiro Matsumoto <matz@ruby-lang.org>
* encoding.c (rb_enc_compatible): 1st argument (typically the * encoding.c (rb_enc_compatible): 1st argument (typically the

View File

@ -93,6 +93,7 @@ VALUE rb_cSymbol;
} while (0) } while (0)
#define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) #define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
#define IS_7BIT(str) (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
VALUE rb_fs; VALUE rb_fs;
@ -472,6 +473,7 @@ str_strlen(VALUE str, rb_encoding *enc)
{ {
long len; long len;
if (is_ascii_string(str)) return RSTRING_LEN(str);
if (!enc) enc = rb_enc_get(str); if (!enc) enc = rb_enc_get(str);
len = rb_enc_strlen(RSTRING_PTR(str), RSTRING_END(str), enc); len = rb_enc_strlen(RSTRING_PTR(str), RSTRING_END(str), enc);
if (len < 0) { if (len < 0) {
@ -750,8 +752,11 @@ rb_str_s_try_convert(VALUE dummy, VALUE str)
} }
static char* static char*
str_nth(const char *p, const char *e, int nth, rb_encoding *enc) str_nth(const char *p, const char *e, int nth, rb_encoding *enc, int asc)
{ {
if (asc)
p += nth;
else
p = rb_enc_nth(p, e, nth, enc); p = rb_enc_nth(p, e, nth, enc);
if (!p) { if (!p) {
rb_raise(rb_eArgError, "invalid mbstring sequence"); rb_raise(rb_eArgError, "invalid mbstring sequence");
@ -763,9 +768,9 @@ str_nth(const char *p, const char *e, int nth, rb_encoding *enc)
} }
static int static int
str_offset(const char *p, const char *e, int nth, rb_encoding *enc) str_offset(const char *p, const char *e, int nth, rb_encoding *enc, int asc)
{ {
const char *pp = str_nth(p, e, nth, enc); const char *pp = str_nth(p, e, nth, enc, asc);
return pp - p; return pp - p;
} }
@ -811,6 +816,7 @@ rb_str_substr(VALUE str, long beg, long len)
rb_encoding *enc = rb_enc_get(str); rb_encoding *enc = rb_enc_get(str);
VALUE str2; VALUE str2;
char *p, *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str); char *p, *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
int asc = IS_7BIT(str);
if (len < 0) return Qnil; if (len < 0) return Qnil;
if (!RSTRING_LEN(str)) { if (!RSTRING_LEN(str)) {
@ -839,7 +845,7 @@ rb_str_substr(VALUE str, long beg, long len)
if (len == 0) { if (len == 0) {
p = 0; p = 0;
} }
else if ((p = str_nth(s, e, beg, enc)) == e) { else if ((p = str_nth(s, e, beg, enc, asc)) == e) {
len = 0; len = 0;
} }
else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) { else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
@ -850,7 +856,7 @@ rb_str_substr(VALUE str, long beg, long len)
len *= rb_enc_mbmaxlen(enc); len *= rb_enc_mbmaxlen(enc);
} }
else { else {
len = str_offset(p, e, len, enc); len = str_offset(p, e, len, enc, asc);
} }
sub: sub:
str2 = rb_str_new5(str, p, len); str2 = rb_str_new5(str, p, len);
@ -1432,7 +1438,7 @@ rb_str_index(VALUE str, VALUE sub, long offset)
if (len - offset < slen) return -1; if (len - offset < slen) return -1;
s = RSTRING_PTR(str); s = RSTRING_PTR(str);
if (offset) { if (offset) {
s = str_nth(s, RSTRING_END(str), offset, enc); s = str_nth(s, RSTRING_END(str), offset, enc, IS_7BIT(str));
offset = s - RSTRING_PTR(str); offset = s - RSTRING_PTR(str);
} }
if (slen == 0) return offset; if (slen == 0) return offset;
@ -1530,6 +1536,7 @@ rb_str_rindex(VALUE str, VALUE sub, long pos)
long len, slen; long len, slen;
char *s, *sbeg, *e, *t; char *s, *sbeg, *e, *t;
rb_encoding *enc; rb_encoding *enc;
int asc = IS_7BIT(str);
enc = rb_enc_check(str, sub); enc = rb_enc_check(str, sub);
len = str_strlen(str, enc); len = str_strlen(str, enc);
@ -1546,7 +1553,7 @@ rb_str_rindex(VALUE str, VALUE sub, long pos)
e = RSTRING_END(str); e = RSTRING_END(str);
t = RSTRING_PTR(sub); t = RSTRING_PTR(sub);
for (;;) { for (;;) {
s = str_nth(sbeg, e, pos, enc); s = str_nth(sbeg, e, pos, enc, asc);
if (memcmp(s, t, slen) == 0) { if (memcmp(s, t, slen) == 0) {
return pos; return pos;
} }
@ -2087,6 +2094,7 @@ rb_str_splice(VALUE str, long beg, long len, VALUE val)
long slen; long slen;
char *p, *e; char *p, *e;
rb_encoding *enc; rb_encoding *enc;
int asc = IS_7BIT(str);
if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len); if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
@ -2108,8 +2116,8 @@ rb_str_splice(VALUE str, long beg, long len, VALUE val)
if (slen < len || slen < beg + len) { if (slen < len || slen < beg + len) {
len = slen - beg; len = slen - beg;
} }
p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc); p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc, asc);
e = str_nth(p, RSTRING_END(str), len, enc); e = str_nth(p, RSTRING_END(str), len, enc, asc);
/* error check */ /* error check */
beg = p - RSTRING_PTR(str); /* physical position */ beg = p - RSTRING_PTR(str); /* physical position */
len = e - p; /* physical length */ len = e - p; /* physical length */
@ -4988,6 +4996,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
char *p, *f = " "; char *p, *f = " ";
long n, llen, rlen; long n, llen, rlen;
volatile VALUE pad; volatile VALUE pad;
int asc = 1;
rb_scan_args(argc, argv, "11", &w, &pad); rb_scan_args(argc, argv, "11", &w, &pad);
enc = rb_enc_get(str); enc = rb_enc_get(str);
@ -4998,6 +5007,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
f = RSTRING_PTR(pad); f = RSTRING_PTR(pad);
flen = RSTRING_LEN(pad); flen = RSTRING_LEN(pad);
fclen = str_strlen(pad, enc); fclen = str_strlen(pad, enc);
asc = is_ascii_string(pad);
if (flen == 0) { if (flen == 0) {
rb_raise(rb_eArgError, "zero width padding"); rb_raise(rb_eArgError, "zero width padding");
} }
@ -5020,7 +5030,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
llen -= fclen; llen -= fclen;
} }
else { else {
char *fp = str_nth(f, f+flen, llen, enc); char *fp = str_nth(f, f+flen, llen, enc, asc);
n = fp - f; n = fp - f;
memcpy(p,f,n); memcpy(p,f,n);
p+=n; p+=n;
@ -5040,7 +5050,7 @@ rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
rlen -= fclen; rlen -= fclen;
} }
else { else {
char *fp = str_nth(f, f+flen, rlen, enc); char *fp = str_nth(f, f+flen, rlen, enc, asc);
n = fp - f; n = fp - f;
memcpy(p,f,n); memcpy(p,f,n);
p+=n; p+=n;