[ruby/prism] Properly support parsing regexp in extended mode
https://github.com/ruby/prism/commit/bedc4585ed
This commit is contained in:
parent
30a8dbc861
commit
8fb2227205
@ -17389,7 +17389,7 @@ parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_
|
|||||||
.shared = unescaped->type == PM_STRING_SHARED
|
.shared = unescaped->type == PM_STRING_SHARED
|
||||||
};
|
};
|
||||||
|
|
||||||
pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), NULL, NULL, parse_regular_expression_error, &error_data);
|
pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -20147,7 +20147,7 @@ parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
|
|||||||
* match write node.
|
* match write node.
|
||||||
*/
|
*/
|
||||||
static pm_node_t *
|
static pm_node_t *
|
||||||
parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call) {
|
parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
|
||||||
parse_regular_expression_named_capture_data_t callback_data = {
|
parse_regular_expression_named_capture_data_t callback_data = {
|
||||||
.parser = parser,
|
.parser = parser,
|
||||||
.call = call,
|
.call = call,
|
||||||
@ -20162,7 +20162,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
|
|||||||
.shared = content->type == PM_STRING_SHARED
|
.shared = content->type == PM_STRING_SHARED
|
||||||
};
|
};
|
||||||
|
|
||||||
pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
|
pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
|
||||||
pm_constant_id_list_free(&callback_data.names);
|
pm_constant_id_list_free(&callback_data.names);
|
||||||
|
|
||||||
if (callback_data.match != NULL) {
|
if (callback_data.match != NULL) {
|
||||||
@ -20657,14 +20657,14 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
|
|||||||
pm_string_t owned;
|
pm_string_t owned;
|
||||||
pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
|
pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
|
||||||
|
|
||||||
result = parse_regular_expression_named_captures(parser, &owned, call);
|
result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
|
||||||
pm_string_free(&owned);
|
pm_string_free(&owned);
|
||||||
}
|
}
|
||||||
} else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
|
} else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
|
||||||
// If we have a regular expression node, then we can just parse
|
// If we have a regular expression node, then we can just parse
|
||||||
// the named captures directly off the unescaped string.
|
// the named captures directly off the unescaped string.
|
||||||
const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
|
const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
|
||||||
result = parse_regular_expression_named_captures(parser, content, call);
|
result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
@ -18,6 +18,12 @@ typedef struct {
|
|||||||
/** A pointer to the end of the source that we are parsing. */
|
/** A pointer to the end of the source that we are parsing. */
|
||||||
const uint8_t *end;
|
const uint8_t *end;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether or not the regular expression currently being parsed is in
|
||||||
|
* extended mode, wherein whitespace is ignored and comments are allowed.
|
||||||
|
*/
|
||||||
|
bool extended_mode;
|
||||||
|
|
||||||
/** Whether the encoding has changed from the default. */
|
/** Whether the encoding has changed from the default. */
|
||||||
bool encoding_changed;
|
bool encoding_changed;
|
||||||
|
|
||||||
@ -418,6 +424,19 @@ pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True if the given key is set in the options.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
pm_regexp_options_added_p(pm_regexp_options_t *options, uint8_t key) {
|
||||||
|
if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
|
||||||
|
key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
|
||||||
|
return options->values[key] == PM_REGEXP_OPTION_STATE_ADDED;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Groups can have quite a few different patterns for syntax. They basically
|
* Groups can have quite a few different patterns for syntax. They basically
|
||||||
* just wrap a set of expressions, but they can potentially have options after a
|
* just wrap a set of expressions, but they can potentially have options after a
|
||||||
@ -443,6 +462,9 @@ static bool
|
|||||||
pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
|
pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
|
||||||
const uint8_t *group_start = parser->cursor;
|
const uint8_t *group_start = parser->cursor;
|
||||||
|
|
||||||
|
pm_regexp_options_t options;
|
||||||
|
pm_regexp_options_init(&options);
|
||||||
|
|
||||||
// First, parse any options for the group.
|
// First, parse any options for the group.
|
||||||
if (pm_regexp_char_accept(parser, '?')) {
|
if (pm_regexp_char_accept(parser, '?')) {
|
||||||
if (pm_regexp_char_is_eof(parser)) {
|
if (pm_regexp_char_is_eof(parser)) {
|
||||||
@ -450,9 +472,6 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
pm_regexp_options_t options;
|
|
||||||
pm_regexp_options_init(&options);
|
|
||||||
|
|
||||||
switch (*parser->cursor) {
|
switch (*parser->cursor) {
|
||||||
case '#': { // inline comments
|
case '#': { // inline comments
|
||||||
parser->cursor++;
|
parser->cursor++;
|
||||||
@ -560,6 +579,15 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we are at the end of the group of options and there is no
|
||||||
|
// subexpression, then we are going to be setting the options
|
||||||
|
// for the parent group. In this case we are safe to return now.
|
||||||
|
if (*parser->cursor == ')') {
|
||||||
|
if (pm_regexp_options_added_p(&options, 'x')) parser->extended_mode = true;
|
||||||
|
parser->cursor++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// If we hit a -, then we're done parsing options.
|
// If we hit a -, then we're done parsing options.
|
||||||
if (*parser->cursor != '-') break;
|
if (*parser->cursor != '-') break;
|
||||||
|
|
||||||
@ -577,6 +605,16 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
|
|||||||
if (pm_regexp_char_is_eof(parser)) {
|
if (pm_regexp_char_is_eof(parser)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we are at the end of the group of options and there is no
|
||||||
|
// subexpression, then we are going to be setting the options
|
||||||
|
// for the parent group. In this case we are safe to return now.
|
||||||
|
if (*parser->cursor == ')') {
|
||||||
|
if (pm_regexp_options_added_p(&options, 'x')) parser->extended_mode = true;
|
||||||
|
parser->cursor++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
parser->cursor++;
|
parser->cursor++;
|
||||||
@ -585,15 +623,22 @@ pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool extended_mode = parser->extended_mode;
|
||||||
|
if (pm_regexp_options_added_p(&options, 'x')) {
|
||||||
|
parser->extended_mode = true;
|
||||||
|
}
|
||||||
|
|
||||||
// Now, parse the expressions within this group.
|
// Now, parse the expressions within this group.
|
||||||
while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')') {
|
while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')') {
|
||||||
if (!pm_regexp_parse_expression(parser, (uint16_t) (depth + 1))) {
|
if (!pm_regexp_parse_expression(parser, (uint16_t) (depth + 1))) {
|
||||||
|
parser->extended_mode = extended_mode;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
pm_regexp_char_accept(parser, '|');
|
pm_regexp_char_accept(parser, '|');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finally, make sure we have a closing parenthesis.
|
// Finally, make sure we have a closing parenthesis.
|
||||||
|
parser->extended_mode = extended_mode;
|
||||||
if (pm_regexp_char_expect(parser, ')')) return true;
|
if (pm_regexp_char_expect(parser, ')')) return true;
|
||||||
|
|
||||||
pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern with unmatched parenthesis");
|
pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern with unmatched parenthesis");
|
||||||
@ -641,6 +686,12 @@ pm_regexp_parse_item(pm_regexp_parser_t *parser, uint16_t depth) {
|
|||||||
parser->cursor++;
|
parser->cursor++;
|
||||||
pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "unmatched close parenthesis");
|
pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "unmatched close parenthesis");
|
||||||
return true;
|
return true;
|
||||||
|
case '#':
|
||||||
|
if (parser->extended_mode) {
|
||||||
|
if (!pm_regexp_char_find(parser, '\n')) parser->cursor = parser->end;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
/* fallthrough */
|
||||||
default: {
|
default: {
|
||||||
size_t width;
|
size_t width;
|
||||||
if (!parser->encoding_changed) {
|
if (!parser->encoding_changed) {
|
||||||
@ -702,12 +753,13 @@ pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
|
|||||||
* groups.
|
* groups.
|
||||||
*/
|
*/
|
||||||
PRISM_EXPORTED_FUNCTION void
|
PRISM_EXPORTED_FUNCTION void
|
||||||
pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data) {
|
pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data) {
|
||||||
pm_regexp_parse_pattern(&(pm_regexp_parser_t) {
|
pm_regexp_parse_pattern(&(pm_regexp_parser_t) {
|
||||||
.parser = parser,
|
.parser = parser,
|
||||||
.start = source,
|
.start = source,
|
||||||
.cursor = source,
|
.cursor = source,
|
||||||
.end = source + size,
|
.end = source + size,
|
||||||
|
.extended_mode = extended_mode,
|
||||||
.encoding_changed = parser->encoding_changed,
|
.encoding_changed = parser->encoding_changed,
|
||||||
.encoding = parser->encoding,
|
.encoding = parser->encoding,
|
||||||
.name_callback = name_callback,
|
.name_callback = name_callback,
|
||||||
|
@ -32,11 +32,12 @@ typedef void (*pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *
|
|||||||
* @param parser The parser that is currently being used.
|
* @param parser The parser that is currently being used.
|
||||||
* @param source The source code to parse.
|
* @param source The source code to parse.
|
||||||
* @param size The size of the source code.
|
* @param size The size of the source code.
|
||||||
|
* @param extended_mode Whether to parse the regular expression in extended mode.
|
||||||
* @param name_callback The optional callback to call when a named capture group is found.
|
* @param name_callback The optional callback to call when a named capture group is found.
|
||||||
* @param name_data The optional data to pass to the name callback.
|
* @param name_data The optional data to pass to the name callback.
|
||||||
* @param error_callback The callback to call when a parse error is found.
|
* @param error_callback The callback to call when a parse error is found.
|
||||||
* @param error_data The data to pass to the error callback.
|
* @param error_data The data to pass to the error callback.
|
||||||
*/
|
*/
|
||||||
PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data);
|
PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user