From 0004565a919fde84def202432ef04cee0f7b689e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 3 Aug 2023 13:25:38 -0400 Subject: [PATCH] [ruby/yarp] Use yp_memchr in regexp parsing https://github.com/ruby/yarp/commit/08081dd24f --- yarp/extension.c | 2 +- yarp/regexp.c | 15 ++++++++++----- yarp/regexp.h | 4 +++- yarp/yarp.c | 2 +- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/yarp/extension.c b/yarp/extension.c index d4ce3625d8..4f2065a81b 100644 --- a/yarp/extension.c +++ b/yarp/extension.c @@ -379,7 +379,7 @@ named_captures(VALUE self, VALUE source) { yp_string_list_t string_list; yp_string_list_init(&string_list); - if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list)) { + if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &yp_encoding_utf_8)) { yp_string_list_free(&string_list); return Qnil; } diff --git a/yarp/regexp.c b/yarp/regexp.c index 4855859442..40d85c5227 100644 --- a/yarp/regexp.c +++ b/yarp/regexp.c @@ -6,16 +6,20 @@ typedef struct { const char *cursor; const char *end; yp_string_list_t *named_captures; + bool encoding_changed; + yp_encoding_t *encoding; } yp_regexp_parser_t; // This initializes a new parser with the given source. static void -yp_regexp_parser_init(yp_regexp_parser_t *parser, const char *start, const char *end, yp_string_list_t *named_captures) { +yp_regexp_parser_init(yp_regexp_parser_t *parser, const char *start, const char *end, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) { *parser = (yp_regexp_parser_t) { .start = start, .cursor = start, .end = end, - .named_captures = named_captures + .named_captures = named_captures, + .encoding_changed = encoding_changed, + .encoding = encoding }; } @@ -60,7 +64,8 @@ yp_regexp_char_find(yp_regexp_parser_t *parser, char value) { if (yp_regexp_char_is_eof(parser)) { return false; } - const char *end = (const char *) memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor)); + + const char *end = (const char *) yp_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding); if (end == NULL) { return false; } @@ -542,8 +547,8 @@ yp_regexp_parse_pattern(yp_regexp_parser_t *parser) { // Parse a regular expression and extract the names of all of the named capture // groups. YP_EXPORTED_FUNCTION bool -yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures) { +yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) { yp_regexp_parser_t parser; - yp_regexp_parser_init(&parser, source, source + size, named_captures); + yp_regexp_parser_init(&parser, source, source + size, named_captures, encoding_changed, encoding); return yp_regexp_parse_pattern(&parser); } diff --git a/yarp/regexp.h b/yarp/regexp.h index cf624db6b8..5a2f13047e 100644 --- a/yarp/regexp.h +++ b/yarp/regexp.h @@ -3,6 +3,8 @@ #include "yarp/defines.h" #include "yarp/parser.h" +#include "yarp/enc/yp_encoding.h" +#include "yarp/util/yp_memchr.h" #include "yarp/util/yp_string_list.h" #include "yarp/util/yp_string.h" @@ -12,6 +14,6 @@ // Parse a regular expression and extract the names of all of the named capture // groups. -YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures); +YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding); #endif diff --git a/yarp/yarp.c b/yarp/yarp.c index dd27c172f4..a8ff6c3859 100644 --- a/yarp/yarp.c +++ b/yarp/yarp.c @@ -12535,7 +12535,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc; - if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures)) { + if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures, parser->encoding_changed, &parser->encoding)) { for (size_t index = 0; index < named_captures.length; index++) { yp_string_t *name = &named_captures.strings[index]; assert(name->type == YP_STRING_SHARED);