[ruby/yarp] Use yp_memchr in regexp parsing

https://github.com/ruby/yarp/commit/08081dd24f
This commit is contained in:
Kevin Newton 2023-08-03 13:25:38 -04:00 committed by Takashi Kokubun
parent 1ad0d19876
commit 0004565a91
Notes: git 2023-08-17 00:48:06 +00:00
4 changed files with 15 additions and 8 deletions

View File

@ -379,7 +379,7 @@ named_captures(VALUE self, VALUE source) {
yp_string_list_t string_list;
yp_string_list_init(&string_list);
if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list)) {
if (!yp_regexp_named_capture_group_names(RSTRING_PTR(source), RSTRING_LEN(source), &string_list, false, &yp_encoding_utf_8)) {
yp_string_list_free(&string_list);
return Qnil;
}

View File

@ -6,16 +6,20 @@ typedef struct {
const char *cursor;
const char *end;
yp_string_list_t *named_captures;
bool encoding_changed;
yp_encoding_t *encoding;
} yp_regexp_parser_t;
// This initializes a new parser with the given source.
static void
yp_regexp_parser_init(yp_regexp_parser_t *parser, const char *start, const char *end, yp_string_list_t *named_captures) {
yp_regexp_parser_init(yp_regexp_parser_t *parser, const char *start, const char *end, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
*parser = (yp_regexp_parser_t) {
.start = start,
.cursor = start,
.end = end,
.named_captures = named_captures
.named_captures = named_captures,
.encoding_changed = encoding_changed,
.encoding = encoding
};
}
@ -60,7 +64,8 @@ yp_regexp_char_find(yp_regexp_parser_t *parser, char value) {
if (yp_regexp_char_is_eof(parser)) {
return false;
}
const char *end = (const char *) memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor));
const char *end = (const char *) yp_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
if (end == NULL) {
return false;
}
@ -542,8 +547,8 @@ yp_regexp_parse_pattern(yp_regexp_parser_t *parser) {
// Parse a regular expression and extract the names of all of the named capture
// groups.
YP_EXPORTED_FUNCTION bool
yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures) {
yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
yp_regexp_parser_t parser;
yp_regexp_parser_init(&parser, source, source + size, named_captures);
yp_regexp_parser_init(&parser, source, source + size, named_captures, encoding_changed, encoding);
return yp_regexp_parse_pattern(&parser);
}

View File

@ -3,6 +3,8 @@
#include "yarp/defines.h"
#include "yarp/parser.h"
#include "yarp/enc/yp_encoding.h"
#include "yarp/util/yp_memchr.h"
#include "yarp/util/yp_string_list.h"
#include "yarp/util/yp_string.h"
@ -12,6 +14,6 @@
// Parse a regular expression and extract the names of all of the named capture
// groups.
YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures);
YP_EXPORTED_FUNCTION bool yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding);
#endif

View File

@ -12535,7 +12535,7 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc;
if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures)) {
if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures, parser->encoding_changed, &parser->encoding)) {
for (size_t index = 0; index < named_captures.length; index++) {
yp_string_t *name = &named_captures.strings[index];
assert(name->type == YP_STRING_SHARED);