Improve String#rindex performance on OSX
On OSX, String#rindex is slow due to the lack of `memrchr`. The fallback implementation finds a match by instead doing a `memcmp` on every single character in the search string looking for a substring match. For OSX hosts, this changeset introduces a simple `memrchr` implementation, `rb_memrchr`, that can be used instead. An example benchmark below demonstrates an 8000 char long search string with a 10 char substring near the end. ``` ruby-master | substring near the end | osx UTF-8 user system total real index 0.000111 0.000000 0.000111 ( 0.000110) rindex 0.000446 0.000005 0.000451 ( 0.000454) ``` ``` ruby-patched | substring near the end | osx UTF-8 user system total real index 0.000112 0.000000 0.000112 ( 0.000111) rindex 0.000057 0.000001 0.000058 ( 0.000057) ```
This commit is contained in:
parent
5fd3942466
commit
e7cb70be4e
Notes:
git
2024-09-03 05:25:43 +00:00
41
string.c
41
string.c
@ -4345,7 +4345,19 @@ rb_str_byteindex_m(int argc, VALUE *argv, VALUE str)
|
|||||||
return Qnil;
|
return Qnil;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_MEMRCHR
|
#ifndef HAVE_MEMRCHR
|
||||||
|
static void*
|
||||||
|
memrchr(const char *search_str, int chr, long search_len)
|
||||||
|
{
|
||||||
|
const char *ptr = search_str + search_len;
|
||||||
|
do {
|
||||||
|
if ((unsigned char)*(--ptr) == chr) return (void *)ptr;
|
||||||
|
} while (ptr >= search_str);
|
||||||
|
|
||||||
|
return ((void *)0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static long
|
static long
|
||||||
str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
|
str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
|
||||||
{
|
{
|
||||||
@ -4362,6 +4374,10 @@ str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
|
|||||||
c = *t & 0xff;
|
c = *t & 0xff;
|
||||||
searchlen = s - sbeg + 1;
|
searchlen = s - sbeg + 1;
|
||||||
|
|
||||||
|
if (memcmp(s, t, slen) == 0) {
|
||||||
|
return s - sbeg;
|
||||||
|
}
|
||||||
|
|
||||||
do {
|
do {
|
||||||
hit = memrchr(sbeg, c, searchlen);
|
hit = memrchr(sbeg, c, searchlen);
|
||||||
if (!hit) break;
|
if (!hit) break;
|
||||||
@ -4377,29 +4393,6 @@ str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
|
|||||||
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
static long
|
|
||||||
str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
|
|
||||||
{
|
|
||||||
long slen;
|
|
||||||
char *sbeg, *e, *t;
|
|
||||||
|
|
||||||
sbeg = RSTRING_PTR(str);
|
|
||||||
e = RSTRING_END(str);
|
|
||||||
t = RSTRING_PTR(sub);
|
|
||||||
slen = RSTRING_LEN(sub);
|
|
||||||
|
|
||||||
while (s) {
|
|
||||||
if (memcmp(s, t, slen) == 0) {
|
|
||||||
return s - sbeg;
|
|
||||||
}
|
|
||||||
if (s <= sbeg) break;
|
|
||||||
s = rb_enc_prev_char(sbeg, s, e, enc);
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* found index in byte */
|
/* found index in byte */
|
||||||
static long
|
static long
|
||||||
|
Loading…
x
Reference in New Issue
Block a user