Improve String#rindex performance on OSX

On OSX, String#rindex is slow due to the lack of `memrchr`.
The fallback implementation finds a match by instead doing
a `memcmp` on every single character in the search string
looking for a substring match.

For OSX hosts, this changeset introduces a simple `memrchr`
implementation, `rb_memrchr`, that can be used instead. An
example benchmark below demonstrates an 8000 char long
search string with a 10 char substring near the end.

```
ruby-master | substring near the end | osx

UTF-8
       user     system      total        real
index  0.000111   0.000000   0.000111 (  0.000110)
rindex  0.000446   0.000005   0.000451 (  0.000454)
```

```
ruby-patched | substring near the end | osx

UTF-8
       user     system      total        real
index  0.000112   0.000000   0.000112 (  0.000111)
rindex  0.000057   0.000001   0.000058 (  0.000057)
```
This commit is contained in:
Zack Deveau 2024-08-23 13:18:44 -04:00 committed by Nobuyoshi Nakada
parent 5fd3942466
commit e7cb70be4e
Notes: git 2024-09-03 05:25:43 +00:00

View File

@ -4345,7 +4345,19 @@ rb_str_byteindex_m(int argc, VALUE *argv, VALUE str)
return Qnil;
}
#ifdef HAVE_MEMRCHR
#ifndef HAVE_MEMRCHR
static void*
memrchr(const char *search_str, int chr, long search_len)
{
const char *ptr = search_str + search_len;
do {
if ((unsigned char)*(--ptr) == chr) return (void *)ptr;
} while (ptr >= search_str);
return ((void *)0);
}
#endif
static long
str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
{
@ -4362,6 +4374,10 @@ str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
c = *t & 0xff;
searchlen = s - sbeg + 1;
if (memcmp(s, t, slen) == 0) {
return s - sbeg;
}
do {
hit = memrchr(sbeg, c, searchlen);
if (!hit) break;
@ -4377,29 +4393,6 @@ str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
return -1;
}
#else
static long
str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
{
long slen;
char *sbeg, *e, *t;
sbeg = RSTRING_PTR(str);
e = RSTRING_END(str);
t = RSTRING_PTR(sub);
slen = RSTRING_LEN(sub);
while (s) {
if (memcmp(s, t, slen) == 0) {
return s - sbeg;
}
if (s <= sbeg) break;
s = rb_enc_prev_char(sbeg, s, e, enc);
}
return -1;
}
#endif
/* found index in byte */
static long