* string.c (str_nth_len, str_utf8_nth): return the rest length together.

* string.c (rb_str_substr): get rid of measure the length always
  to improve performance for huge string.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@30635 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nobu 2011-01-22 23:00:12 +00:00
parent 18e3c52bc4
commit b0eaf0f621
2 changed files with 41 additions and 16 deletions

View File

@ -1,3 +1,10 @@
Sun Jan 23 08:00:09 2011 Nobuyoshi Nakada <nobu@ruby-lang.org>
* string.c (str_nth_len, str_utf8_nth): return the rest length together.
* string.c (rb_str_substr): get rid of measure the length always
to improve performance for huge string.
Sun Jan 23 00:40:10 2011 KOSAKI Motohiro <kosaki.motohiro@gmail.com> Sun Jan 23 00:40:10 2011 KOSAKI Motohiro <kosaki.motohiro@gmail.com>
* test/test_syslog.rb: Fix to make a lot of test failure if * test/test_syslog.rb: Fix to make a lot of test failure if

View File

@ -1420,9 +1420,10 @@ rb_str_s_try_convert(VALUE dummy, VALUE str)
return rb_check_string_type(str); return rb_check_string_type(str);
} }
char* static char*
rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc) str_nth_len(const char *p, const char *e, long *nthp, rb_encoding *enc)
{ {
long nth = *nthp;
if (rb_enc_mbmaxlen(enc) == 1) { if (rb_enc_mbmaxlen(enc) == 1) {
p += nth; p += nth;
} }
@ -1435,12 +1436,16 @@ rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
while (p < e && 0 < nth) { while (p < e && 0 < nth) {
e2 = p + nth; e2 = p + nth;
if (e < e2) if (e < e2) {
*nthp = nth;
return (char *)e; return (char *)e;
}
if (ISASCII(*p)) { if (ISASCII(*p)) {
p2 = search_nonascii(p, e2); p2 = search_nonascii(p, e2);
if (!p2) if (!p2) {
*nthp = nth;
return (char *)e2; return (char *)e2;
}
nth -= p2 - p; nth -= p2 - p;
p = p2; p = p2;
} }
@ -1448,8 +1453,10 @@ rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
p += n; p += n;
nth--; nth--;
} }
if (nth != 0) *nthp = nth;
if (nth != 0) {
return (char *)e; return (char *)e;
}
return (char *)p; return (char *)p;
} }
else { else {
@ -1458,16 +1465,23 @@ rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
} }
} }
if (p > e) p = e; if (p > e) p = e;
*nthp = nth;
return (char*)p; return (char*)p;
} }
char*
rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
{
return str_nth_len(p, e, &nth, enc);
}
static char* static char*
str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte) str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
{ {
if (singlebyte) if (singlebyte)
p += nth; p += nth;
else { else {
p = rb_enc_nth(p, e, nth, enc); p = str_nth_len(p, e, &nth, enc);
} }
if (!p) return 0; if (!p) return 0;
if (p > e) p = e; if (p > e) p = e;
@ -1492,8 +1506,9 @@ rb_str_offset(VALUE str, long pos)
#ifdef NONASCII_MASK #ifdef NONASCII_MASK
static char * static char *
str_utf8_nth(const char *p, const char *e, long nth) str_utf8_nth(const char *p, const char *e, long *nthp)
{ {
long nth = *nthp;
if ((int)SIZEOF_VALUE < e - p && (int)SIZEOF_VALUE * 2 < nth) { if ((int)SIZEOF_VALUE < e - p && (int)SIZEOF_VALUE * 2 < nth) {
const VALUE *s, *t; const VALUE *s, *t;
const VALUE lowbits = sizeof(VALUE) - 1; const VALUE lowbits = sizeof(VALUE) - 1;
@ -1516,13 +1531,14 @@ str_utf8_nth(const char *p, const char *e, long nth)
} }
p++; p++;
} }
*nthp = nth;
return (char *)p; return (char *)p;
} }
static long static long
str_utf8_offset(const char *p, const char *e, long nth) str_utf8_offset(const char *p, const char *e, long nth)
{ {
const char *pp = str_utf8_nth(p, e, nth); const char *pp = str_utf8_nth(p, e, &nth);
return pp - p; return pp - p;
} }
#endif #endif
@ -1603,16 +1619,18 @@ rb_str_substr(VALUE str, long beg, long len)
if (beg < 0) return Qnil; if (beg < 0) return Qnil;
} }
} }
else if (beg > 0 && beg > str_strlen(str, enc)) { else if (beg > 0 && beg > RSTRING_LEN(str)) {
return Qnil; return Qnil;
} }
if (len == 0) { if (len == 0) {
if (beg > str_strlen(str, enc)) return Qnil;
p = 0; p = 0;
} }
#ifdef NONASCII_MASK #ifdef NONASCII_MASK
else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID && else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
enc == rb_utf8_encoding()) { enc == rb_utf8_encoding()) {
p = str_utf8_nth(s, e, beg); p = str_utf8_nth(s, e, &beg);
if (beg > 0) return Qnil;
len = str_utf8_offset(p, e, len); len = str_utf8_offset(p, e, len);
} }
#endif #endif
@ -1621,15 +1639,15 @@ rb_str_substr(VALUE str, long beg, long len)
p = s + beg * char_sz; p = s + beg * char_sz;
if (p > e) { if (p > e) {
p = e; return Qnil;
len = 0;
} }
else if (len * char_sz > e - p) else if (len * char_sz > e - p)
len = e - p; len = e - p;
else else
len *= char_sz; len *= char_sz;
} }
else if ((p = str_nth(s, e, beg, enc, 0)) == e) { else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
if (beg > 0) return Qnil;
len = 0; len = 0;
} }
else { else {