Improve String#rindex performance on OSX
On OSX, String#rindex is slow due to the lack of `memrchr`. The fallback implementation finds a match by instead doing a `memcmp` on every single character in the search string looking for a substring match. For OSX hosts, this changeset introduces a simple `memrchr` implementation, `rb_memrchr`, that can be used instead. An example benchmark below demonstrates an 8000 char long search string with a 10 char substring near the end. ``` ruby-master | substring near the end | osx UTF-8 user system total real index 0.000111 0.000000 0.000111 ( 0.000110) rindex 0.000446 0.000005 0.000451 ( 0.000454) ``` ``` ruby-patched | substring near the end | osx UTF-8 user system total real index 0.000112 0.000000 0.000112 ( 0.000111) rindex 0.000057 0.000001 0.000058 ( 0.000057) ```
This commit is contained in:
parent
5fd3942466
commit
e7cb70be4e
Notes:
git
2024-09-03 05:25:43 +00:00
41
string.c
41
string.c
@ -4345,7 +4345,19 @@ rb_str_byteindex_m(int argc, VALUE *argv, VALUE str)
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
#ifdef HAVE_MEMRCHR
|
||||
#ifndef HAVE_MEMRCHR
|
||||
static void*
|
||||
memrchr(const char *search_str, int chr, long search_len)
|
||||
{
|
||||
const char *ptr = search_str + search_len;
|
||||
do {
|
||||
if ((unsigned char)*(--ptr) == chr) return (void *)ptr;
|
||||
} while (ptr >= search_str);
|
||||
|
||||
return ((void *)0);
|
||||
}
|
||||
#endif
|
||||
|
||||
static long
|
||||
str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
|
||||
{
|
||||
@ -4362,6 +4374,10 @@ str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
|
||||
c = *t & 0xff;
|
||||
searchlen = s - sbeg + 1;
|
||||
|
||||
if (memcmp(s, t, slen) == 0) {
|
||||
return s - sbeg;
|
||||
}
|
||||
|
||||
do {
|
||||
hit = memrchr(sbeg, c, searchlen);
|
||||
if (!hit) break;
|
||||
@ -4377,29 +4393,6 @@ str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
|
||||
|
||||
return -1;
|
||||
}
|
||||
#else
|
||||
static long
|
||||
str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc)
|
||||
{
|
||||
long slen;
|
||||
char *sbeg, *e, *t;
|
||||
|
||||
sbeg = RSTRING_PTR(str);
|
||||
e = RSTRING_END(str);
|
||||
t = RSTRING_PTR(sub);
|
||||
slen = RSTRING_LEN(sub);
|
||||
|
||||
while (s) {
|
||||
if (memcmp(s, t, slen) == 0) {
|
||||
return s - sbeg;
|
||||
}
|
||||
if (s <= sbeg) break;
|
||||
s = rb_enc_prev_char(sbeg, s, e, enc);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* found index in byte */
|
||||
static long
|
||||
|
Loading…
x
Reference in New Issue
Block a user