From 0b6a26abff1a15103f4a3080619ea28b40774324 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Thu, 3 Aug 2023 13:38:54 -0400
Subject: [PATCH] [ruby/yarp] Provide slow path for finding ) in regexp when
 encoding changes

https://github.com/ruby/yarp/commit/c1064a65e3
---
 yarp/regexp.c | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/yarp/regexp.c b/yarp/regexp.c
index 40d85c5227..ce148c82f2 100644
--- a/yarp/regexp.c
+++ b/yarp/regexp.c
@@ -379,12 +379,35 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
 
         switch (*parser->cursor) {
             case '#': { // inline comments
-                bool found = yp_regexp_char_find(parser, ')');
-                // the close paren we found is escaped, we need to find another
-                while (found && (parser->start <= parser->cursor - 2) && (*(parser->cursor - 2) == '\\')) {
-                    found = yp_regexp_char_find(parser, ')');
+                if (parser->encoding_changed && parser->encoding->multibyte) {
+                    // Here we're going to take a slow path and iterate through
+                    // each multibyte character to find the close paren. We do
+                    // this because \ can be a trailing byte in some encodings.
+                    while (parser->cursor < parser->end) {
+                        if (*parser->cursor == ')') {
+                            parser->cursor++;
+                            return true;
+                        }
+
+                        size_t width = parser->encoding->char_width(parser->cursor, (ptrdiff_t) (parser->end - parser->cursor));
+                        if (width == 0) return false;
+
+                        parser->cursor += width;
+                    }
+
+                    return false;
+                } else {
+                    // Here we can take the fast path and use memchr to find the
+                    // next ) because we are safe checking backward for \ since
+                    // it cannot be a trailing character.
+                    bool found = yp_regexp_char_find(parser, ')');
+
+                    while (found && (parser->start <= parser->cursor - 2) && (*(parser->cursor - 2) == '\\')) {
+                        found = yp_regexp_char_find(parser, ')');
+                    }
+
+                    return found;
                 }
-                return found;
             }
             case ':': // non-capturing group
             case '=': // positive lookahead