[ruby/yarp] Provide slow path for finding ) in regexp when encoding changes

https://github.com/ruby/yarp/commit/c1064a65e3
This commit is contained in:
Kevin Newton 2023-08-03 13:38:54 -04:00 committed by Takashi Kokubun
parent 0004565a91
commit 0b6a26abff
Notes: git 2023-08-17 00:48:06 +00:00

View File

@ -379,13 +379,36 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
switch (*parser->cursor) {
case '#': { // inline comments
if (parser->encoding_changed && parser->encoding->multibyte) {
// Here we're going to take a slow path and iterate through
// each multibyte character to find the close paren. We do
// this because \ can be a trailing byte in some encodings.
while (parser->cursor < parser->end) {
if (*parser->cursor == ')') {
parser->cursor++;
return true;
}
size_t width = parser->encoding->char_width(parser->cursor, (ptrdiff_t) (parser->end - parser->cursor));
if (width == 0) return false;
parser->cursor += width;
}
return false;
} else {
// Here we can take the fast path and use memchr to find the
// next ) because we are safe checking backward for \ since
// it cannot be a trailing character.
bool found = yp_regexp_char_find(parser, ')');
// the close paren we found is escaped, we need to find another
while (found && (parser->start <= parser->cursor - 2) && (*(parser->cursor - 2) == '\\')) {
found = yp_regexp_char_find(parser, ')');
}
return found;
}
}
case ':': // non-capturing group
case '=': // positive lookahead
case '!': // negative lookahead