[ruby/yarp] Fix string concat parsing
https://github.com/ruby/yarp/commit/58f839a3eb
This commit is contained in:
parent
77e52735f0
commit
ee885237f6
Notes:
git
2023-08-17 00:48:02 +00:00
@ -198,11 +198,11 @@ ProgramNode(0...608)(
|
||||
)
|
||||
),
|
||||
StringConcatNode(552...560)(
|
||||
StringNode(552...554)((552...553), (553...553), (553...554), ""),
|
||||
StringConcatNode(555...560)(
|
||||
StringNode(555...557)((555...556), (556...556), (556...557), ""),
|
||||
StringNode(558...560)((558...559), (559...559), (559...560), "")
|
||||
)
|
||||
StringConcatNode(552...557)(
|
||||
StringNode(552...554)((552...553), (553...553), (553...554), ""),
|
||||
StringNode(555...557)((555...556), (556...556), (556...557), "")
|
||||
),
|
||||
StringNode(558...560)((558...559), (559...559), (559...560), "")
|
||||
),
|
||||
StringConcatNode(562...574)(
|
||||
InterpolatedStringNode(562...570)(
|
||||
|
240
yarp/yarp.c
240
yarp/yarp.c
@ -11950,95 +11950,115 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
||||
return (yp_node_t *) node;
|
||||
}
|
||||
case YP_TOKEN_STRING_BEGIN: {
|
||||
assert(parser->lex_modes.current->mode == YP_LEX_STRING);
|
||||
bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
|
||||
yp_node_t *result = NULL;
|
||||
|
||||
yp_token_t opening = parser->current;
|
||||
parser_lex(parser);
|
||||
while (match_type_p(parser, YP_TOKEN_STRING_BEGIN)) {
|
||||
assert(parser->lex_modes.current->mode == YP_LEX_STRING);
|
||||
bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
|
||||
|
||||
yp_node_t *node;
|
||||
|
||||
if (accept(parser, YP_TOKEN_STRING_END)) {
|
||||
// If we get here, then we have an end immediately after a start. In
|
||||
// that case we'll create an empty content token and return an
|
||||
// uninterpolated string.
|
||||
yp_token_t content = (yp_token_t) {
|
||||
.type = YP_TOKEN_STRING_CONTENT,
|
||||
.start = parser->previous.start,
|
||||
.end = parser->previous.start
|
||||
};
|
||||
|
||||
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE);
|
||||
} else if (accept(parser, YP_TOKEN_LABEL_END)) {
|
||||
// If we get here, then we have an end of a label immediately after a
|
||||
// start. In that case we'll create an empty symbol node.
|
||||
yp_token_t opening = not_provided(parser);
|
||||
yp_token_t content = (yp_token_t) {
|
||||
.type = YP_TOKEN_STRING_CONTENT,
|
||||
.start = parser->previous.start,
|
||||
.end = parser->previous.start
|
||||
};
|
||||
|
||||
return (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
|
||||
} else if (!lex_interpolation) {
|
||||
// If we don't accept interpolation then we expect the string to start
|
||||
// with a single string content node.
|
||||
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter.");
|
||||
yp_token_t content = parser->previous;
|
||||
|
||||
// It is unfortunately possible to have multiple string content nodes in
|
||||
// a row in the case that there's heredoc content in the middle of the
|
||||
// string, like this cursed example:
|
||||
//
|
||||
// <<-END+'b
|
||||
// a
|
||||
// END
|
||||
// c'+'d'
|
||||
//
|
||||
// In that case we need to switch to an interpolated string to be able
|
||||
// to contain all of the parts.
|
||||
if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
|
||||
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
|
||||
|
||||
yp_token_t delimiters = not_provided(parser);
|
||||
yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, YP_UNESCAPE_MINIMAL);
|
||||
yp_node_list_append(&parts, part);
|
||||
|
||||
while (accept(parser, YP_TOKEN_STRING_CONTENT)) {
|
||||
part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, YP_UNESCAPE_MINIMAL);
|
||||
yp_node_list_append(&parts, part);
|
||||
}
|
||||
|
||||
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
|
||||
return (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
||||
}
|
||||
|
||||
if (accept(parser, YP_TOKEN_LABEL_END)) {
|
||||
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
|
||||
}
|
||||
|
||||
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
|
||||
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_MINIMAL);
|
||||
} else if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
|
||||
// In this case we've hit string content so we know the string at
|
||||
// least has something in it. We'll need to check if the following
|
||||
// token is the end (in which case we can return a plain string) or if
|
||||
// it's not then it has interpolation.
|
||||
yp_token_t content = parser->current;
|
||||
yp_node_t *node = NULL;
|
||||
yp_token_t opening = parser->current;
|
||||
parser_lex(parser);
|
||||
|
||||
if (accept(parser, YP_TOKEN_STRING_END)) {
|
||||
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
|
||||
// If we get here, then we have an end immediately after a
|
||||
// start. In that case we'll create an empty content token
|
||||
// and return an uninterpolated string.
|
||||
yp_token_t content = (yp_token_t) {
|
||||
.type = YP_TOKEN_STRING_CONTENT,
|
||||
.start = parser->previous.start,
|
||||
.end = parser->previous.start
|
||||
};
|
||||
|
||||
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE);
|
||||
} else if (accept(parser, YP_TOKEN_LABEL_END)) {
|
||||
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
|
||||
// If we get here, then we have an end of a label
|
||||
// immediately after a start. In that case we'll create an
|
||||
// empty symbol node.
|
||||
yp_token_t opening = not_provided(parser);
|
||||
yp_token_t content = (yp_token_t) {
|
||||
.type = YP_TOKEN_STRING_CONTENT,
|
||||
.start = parser->previous.start,
|
||||
.end = parser->previous.start
|
||||
};
|
||||
|
||||
node = (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
|
||||
} else if (!lex_interpolation) {
|
||||
// If we don't accept interpolation then we expect the
|
||||
// string to start with a single string content node.
|
||||
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter.");
|
||||
yp_token_t content = parser->previous;
|
||||
|
||||
// It is unfortunately possible to have multiple string
|
||||
// content nodes in a row in the case that there's heredoc
|
||||
// content in the middle of the string, like this cursed
|
||||
// example:
|
||||
//
|
||||
// <<-END+'b
|
||||
// a
|
||||
// END
|
||||
// c'+'d'
|
||||
//
|
||||
// In that case we need to switch to an interpolated string
|
||||
// to be able to contain all of the parts.
|
||||
if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
|
||||
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
|
||||
|
||||
yp_token_t delimiters = not_provided(parser);
|
||||
yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, YP_UNESCAPE_MINIMAL);
|
||||
yp_node_list_append(&parts, part);
|
||||
|
||||
while (accept(parser, YP_TOKEN_STRING_CONTENT)) {
|
||||
part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, YP_UNESCAPE_MINIMAL);
|
||||
yp_node_list_append(&parts, part);
|
||||
}
|
||||
|
||||
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
|
||||
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
||||
} else if (accept(parser, YP_TOKEN_LABEL_END)) {
|
||||
node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
|
||||
} else {
|
||||
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
|
||||
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_MINIMAL);
|
||||
}
|
||||
} else if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
|
||||
// In this case we've hit string content so we know the string at
|
||||
// least has something in it. We'll need to check if the following
|
||||
// token is the end (in which case we can return a plain string) or if
|
||||
// it's not then it has interpolation.
|
||||
yp_token_t content = parser->current;
|
||||
parser_lex(parser);
|
||||
|
||||
if (accept(parser, YP_TOKEN_STRING_END)) {
|
||||
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
|
||||
} else if (accept(parser, YP_TOKEN_LABEL_END)) {
|
||||
node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
|
||||
} else {
|
||||
// If we get here, then we have interpolation so we'll need to create
|
||||
// a string or symbol node with interpolation.
|
||||
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
|
||||
yp_token_t string_opening = not_provided(parser);
|
||||
yp_token_t string_closing = not_provided(parser);
|
||||
yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, YP_UNESCAPE_ALL);
|
||||
yp_node_list_append(&parts, part);
|
||||
|
||||
while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
|
||||
yp_node_t *part = parse_string_part(parser);
|
||||
if (part != NULL) yp_node_list_append(&parts, part);
|
||||
}
|
||||
|
||||
if (accept(parser, YP_TOKEN_LABEL_END)) {
|
||||
node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
||||
} else {
|
||||
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
|
||||
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If we get here, then we have interpolation so we'll need to create
|
||||
// a string or symbol node with interpolation.
|
||||
// If we get here, then the first part of the string is not plain string
|
||||
// content, in which case we need to parse the string as an interpolated
|
||||
// string.
|
||||
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
|
||||
yp_token_t string_opening = not_provided(parser);
|
||||
yp_token_t string_closing = not_provided(parser);
|
||||
yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, YP_UNESCAPE_ALL);
|
||||
yp_node_list_append(&parts, part);
|
||||
|
||||
while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
|
||||
yp_node_t *part = parse_string_part(parser);
|
||||
@ -12046,43 +12066,39 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
||||
}
|
||||
|
||||
if (accept(parser, YP_TOKEN_LABEL_END)) {
|
||||
return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
||||
node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
||||
} else {
|
||||
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
|
||||
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
||||
}
|
||||
}
|
||||
|
||||
if (result == NULL) {
|
||||
// If the node we just parsed is a symbol node, then we
|
||||
// can't concatenate it with anything else, so we can now
|
||||
// return that node.
|
||||
if (YP_NODE_TYPE_P(node, YP_NODE_SYMBOL_NODE) || YP_NODE_TYPE_P(node, YP_NODE_INTERPOLATED_SYMBOL_NODE)) {
|
||||
return node;
|
||||
}
|
||||
|
||||
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
|
||||
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
||||
}
|
||||
} else {
|
||||
// If we get here, then the first part of the string is not plain string
|
||||
// content, in which case we need to parse the string as an interpolated
|
||||
// string.
|
||||
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
|
||||
// If we don't already have a node, then it's fine and we
|
||||
// can just set the result to be the node we just parsed.
|
||||
result = node;
|
||||
} else {
|
||||
// Otherwise we need to check the type of the node we just
|
||||
// parsed. If it cannot be concatenated with the previous
|
||||
// node, then we'll need to add a syntax error.
|
||||
if (!YP_NODE_TYPE_P(node, YP_NODE_STRING_NODE) && !YP_NODE_TYPE_P(node, YP_NODE_INTERPOLATED_STRING_NODE)) {
|
||||
yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, "Unexpected string concatenation.");
|
||||
}
|
||||
|
||||
while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
|
||||
yp_node_t *part = parse_string_part(parser);
|
||||
if (part != NULL) yp_node_list_append(&parts, part);
|
||||
// Either way we will create a concat node to hold the
|
||||
// strings together.
|
||||
result = (yp_node_t *) yp_string_concat_node_create(parser, result, node);
|
||||
}
|
||||
|
||||
if (accept(parser, YP_TOKEN_LABEL_END)) {
|
||||
return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
||||
}
|
||||
|
||||
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
|
||||
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
||||
}
|
||||
|
||||
// If there's a string immediately following this string, then it's a
|
||||
// concatenatation. In this case we'll parse the next string and create a
|
||||
// node in the tree that concatenates the two strings.
|
||||
if (parser->current.type == YP_TOKEN_STRING_BEGIN) {
|
||||
return (yp_node_t *) yp_string_concat_node_create(
|
||||
parser,
|
||||
node,
|
||||
parse_expression(parser, YP_BINDING_POWER_CALL, "Expected string on the right side of concatenation.")
|
||||
);
|
||||
} else {
|
||||
return node;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
case YP_TOKEN_SYMBOL_BEGIN: {
|
||||
yp_lex_mode_t lex_mode = *parser->lex_modes.current;
|
||||
|
Loading…
x
Reference in New Issue
Block a user