[ruby/yarp] Use yp_memchr in regexp parsing

https://github.com/ruby/yarp/commit/08081dd24f
This commit is contained in:
Kevin Newton 2023-08-03 13:25:38 -04:00 committed by Takashi Kokubun
parent 1ad0d19876
commit 0004565a91
Notes: git 2023-08-17 00:48:06 +00:00
4 changed files with 15 additions and 8 deletions

View File

@ -379,7 +379,7 @@ named_captures(VALUE self, VALUE source) {
yp_string_list_t string_list; yp_string_list_t string_list;
yp_string_list_init(&string_list); yp_string_list_init(&string_list);
if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list)) { if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &yp_encoding_utf_8)) {
yp_string_list_free(&string_list); yp_string_list_free(&string_list);
return Qnil; return Qnil;
} }

View File

@ -6,16 +6,20 @@ typedef struct {
const char *cursor; const char *cursor;
const char *end; const char *end;
yp_string_list_t *named_captures; yp_string_list_t *named_captures;
bool encoding_changed;
yp_encoding_t *encoding;
} yp_regexp_parser_t; } yp_regexp_parser_t;
// This initializes a new parser with the given source. // This initializes a new parser with the given source.
static void static void
yp_regexp_parser_init(yp_regexp_parser_t *parser, const char *start, const char *end, yp_string_list_t *named_captures) { yp_regexp_parser_init(yp_regexp_parser_t *parser, const char *start, const char *end, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
*parser = (yp_regexp_parser_t) { *parser = (yp_regexp_parser_t) {
.start = start, .start = start,
.cursor = start, .cursor = start,
.end = end, .end = end,
.named_captures = named_captures .named_captures = named_captures,
.encoding_changed = encoding_changed,
.encoding = encoding
}; };
} }
@ -60,7 +64,8 @@ yp_regexp_char_find(yp_regexp_parser_t *parser, char value) {
if (yp_regexp_char_is_eof(parser)) { if (yp_regexp_char_is_eof(parser)) {
return false; return false;
} }
const char *end = (const char *) memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor));
const char *end = (const char *) yp_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
if (end == NULL) { if (end == NULL) {
return false; return false;
} }
@ -542,8 +547,8 @@ yp_regexp_parse_pattern(yp_regexp_parser_t *parser) {
// Parse a regular expression and extract the names of all of the named capture // Parse a regular expression and extract the names of all of the named capture
// groups. // groups.
YP_EXPORTED_FUNCTION bool YP_EXPORTED_FUNCTION bool
yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures) { yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
yp_regexp_parser_t parser; yp_regexp_parser_t parser;
yp_regexp_parser_init(&parser, source, source + size, named_captures); yp_regexp_parser_init(&parser, source, source + size, named_captures, encoding_changed, encoding);
return yp_regexp_parse_pattern(&parser); return yp_regexp_parse_pattern(&parser);
} }

View File

@ -3,6 +3,8 @@
#include "yarp/defines.h" #include "yarp/defines.h"
#include "yarp/parser.h" #include "yarp/parser.h"
#include "yarp/enc/yp_encoding.h"
#include "yarp/util/yp_memchr.h"
#include "yarp/util/yp_string_list.h" #include "yarp/util/yp_string_list.h"
#include "yarp/util/yp_string.h" #include "yarp/util/yp_string.h"
@ -12,6 +14,6 @@
// Parse a regular expression and extract the names of all of the named capture // Parse a regular expression and extract the names of all of the named capture
// groups. // groups.
YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures); YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding);
#endif #endif

View File

@ -12535,7 +12535,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc; yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc;
if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures)) { if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures, parser->encoding_changed, &parser->encoding)) {
for (size_t index = 0; index < named_captures.length; index++) { for (size_t index = 0; index < named_captures.length; index++) {
yp_string_t *name = &named_captures.strings[index]; yp_string_t *name = &named_captures.strings[index];
assert(name->type == YP_STRING_SHARED); assert(name->type == YP_STRING_SHARED);