From e7cb70be4eb7411204f73ee748e317fefaa0410a Mon Sep 17 00:00:00 2001 From: Zack Deveau Date: Fri, 23 Aug 2024 13:18:44 -0400 Subject: [PATCH] Improve String#rindex performance on OSX On OSX, String#rindex is slow due to the lack of `memrchr`. The fallback implementation finds a match by instead doing a `memcmp` on every single character in the search string looking for a substring match. For OSX hosts, this changeset introduces a simple `memrchr` implementation, `rb_memrchr`, that can be used instead. An example benchmark below demonstrates an 8000 char long search string with a 10 char substring near the end. ``` ruby-master | substring near the end | osx UTF-8 user system total real index 0.000111 0.000000 0.000111 ( 0.000110) rindex 0.000446 0.000005 0.000451 ( 0.000454) ``` ``` ruby-patched | substring near the end | osx UTF-8 user system total real index 0.000112 0.000000 0.000112 ( 0.000111) rindex 0.000057 0.000001 0.000058 ( 0.000057) ``` --- string.c | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/string.c b/string.c index 6a6b4c8b29..025382c0f4 100644 --- a/string.c +++ b/string.c @@ -4345,7 +4345,19 @@ rb_str_byteindex_m(int argc, VALUE *argv, VALUE str) return Qnil; } -#ifdef HAVE_MEMRCHR +#ifndef HAVE_MEMRCHR +static void* +memrchr(const char *search_str, int chr, long search_len) +{ + const char *ptr = search_str + search_len; + do { + if ((unsigned char)*(--ptr) == chr) return (void *)ptr; + } while (ptr >= search_str); + + return ((void *)0); +} +#endif + static long str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc) { @@ -4362,6 +4374,10 @@ str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc) c = *t & 0xff; searchlen = s - sbeg + 1; + if (memcmp(s, t, slen) == 0) { + return s - sbeg; + } + do { hit = memrchr(sbeg, c, searchlen); if (!hit) break; @@ -4377,29 +4393,6 @@ str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc) return -1; } -#else -static long -str_rindex(VALUE str, VALUE sub, const char *s, rb_encoding *enc) -{ - long slen; - char *sbeg, *e, *t; - - sbeg = RSTRING_PTR(str); - e = RSTRING_END(str); - t = RSTRING_PTR(sub); - slen = RSTRING_LEN(sub); - - while (s) { - if (memcmp(s, t, slen) == 0) { - return s - sbeg; - } - if (s <= sbeg) break; - s = rb_enc_prev_char(sbeg, s, e, enc); - } - - return -1; -} -#endif /* found index in byte */ static long