[ruby/yarp] Fix string concat parsing
https://github.com/ruby/yarp/commit/58f839a3eb
This commit is contained in:
parent
77e52735f0
commit
ee885237f6
Notes:
git
2023-08-17 00:48:02 +00:00
@ -198,11 +198,11 @@ ProgramNode(0...608)(
|
|||||||
)
|
)
|
||||||
),
|
),
|
||||||
StringConcatNode(552...560)(
|
StringConcatNode(552...560)(
|
||||||
StringNode(552...554)((552...553), (553...553), (553...554), ""),
|
StringConcatNode(552...557)(
|
||||||
StringConcatNode(555...560)(
|
StringNode(552...554)((552...553), (553...553), (553...554), ""),
|
||||||
StringNode(555...557)((555...556), (556...556), (556...557), ""),
|
StringNode(555...557)((555...556), (556...556), (556...557), "")
|
||||||
StringNode(558...560)((558...559), (559...559), (559...560), "")
|
),
|
||||||
)
|
StringNode(558...560)((558...559), (559...559), (559...560), "")
|
||||||
),
|
),
|
||||||
StringConcatNode(562...574)(
|
StringConcatNode(562...574)(
|
||||||
InterpolatedStringNode(562...570)(
|
InterpolatedStringNode(562...570)(
|
||||||
|
240
yarp/yarp.c
240
yarp/yarp.c
@ -11950,95 +11950,115 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|||||||
return (yp_node_t *) node;
|
return (yp_node_t *) node;
|
||||||
}
|
}
|
||||||
case YP_TOKEN_STRING_BEGIN: {
|
case YP_TOKEN_STRING_BEGIN: {
|
||||||
assert(parser->lex_modes.current->mode == YP_LEX_STRING);
|
yp_node_t *result = NULL;
|
||||||
bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
|
|
||||||
|
|
||||||
yp_token_t opening = parser->current;
|
while (match_type_p(parser, YP_TOKEN_STRING_BEGIN)) {
|
||||||
parser_lex(parser);
|
assert(parser->lex_modes.current->mode == YP_LEX_STRING);
|
||||||
|
bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
|
||||||
|
|
||||||
yp_node_t *node;
|
yp_node_t *node = NULL;
|
||||||
|
yp_token_t opening = parser->current;
|
||||||
if (accept(parser, YP_TOKEN_STRING_END)) {
|
|
||||||
// If we get here, then we have an end immediately after a start. In
|
|
||||||
// that case we'll create an empty content token and return an
|
|
||||||
// uninterpolated string.
|
|
||||||
yp_token_t content = (yp_token_t) {
|
|
||||||
.type = YP_TOKEN_STRING_CONTENT,
|
|
||||||
.start = parser->previous.start,
|
|
||||||
.end = parser->previous.start
|
|
||||||
};
|
|
||||||
|
|
||||||
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE);
|
|
||||||
} else if (accept(parser, YP_TOKEN_LABEL_END)) {
|
|
||||||
// If we get here, then we have an end of a label immediately after a
|
|
||||||
// start. In that case we'll create an empty symbol node.
|
|
||||||
yp_token_t opening = not_provided(parser);
|
|
||||||
yp_token_t content = (yp_token_t) {
|
|
||||||
.type = YP_TOKEN_STRING_CONTENT,
|
|
||||||
.start = parser->previous.start,
|
|
||||||
.end = parser->previous.start
|
|
||||||
};
|
|
||||||
|
|
||||||
return (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
|
|
||||||
} else if (!lex_interpolation) {
|
|
||||||
// If we don't accept interpolation then we expect the string to start
|
|
||||||
// with a single string content node.
|
|
||||||
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter.");
|
|
||||||
yp_token_t content = parser->previous;
|
|
||||||
|
|
||||||
// It is unfortunately possible to have multiple string content nodes in
|
|
||||||
// a row in the case that there's heredoc content in the middle of the
|
|
||||||
// string, like this cursed example:
|
|
||||||
//
|
|
||||||
// <<-END+'b
|
|
||||||
// a
|
|
||||||
// END
|
|
||||||
// c'+'d'
|
|
||||||
//
|
|
||||||
// In that case we need to switch to an interpolated string to be able
|
|
||||||
// to contain all of the parts.
|
|
||||||
if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
|
|
||||||
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
|
|
||||||
|
|
||||||
yp_token_t delimiters = not_provided(parser);
|
|
||||||
yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, YP_UNESCAPE_MINIMAL);
|
|
||||||
yp_node_list_append(&parts, part);
|
|
||||||
|
|
||||||
while (accept(parser, YP_TOKEN_STRING_CONTENT)) {
|
|
||||||
part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, YP_UNESCAPE_MINIMAL);
|
|
||||||
yp_node_list_append(&parts, part);
|
|
||||||
}
|
|
||||||
|
|
||||||
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
|
|
||||||
return (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (accept(parser, YP_TOKEN_LABEL_END)) {
|
|
||||||
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
|
|
||||||
}
|
|
||||||
|
|
||||||
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
|
|
||||||
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_MINIMAL);
|
|
||||||
} else if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
|
|
||||||
// In this case we've hit string content so we know the string at
|
|
||||||
// least has something in it. We'll need to check if the following
|
|
||||||
// token is the end (in which case we can return a plain string) or if
|
|
||||||
// it's not then it has interpolation.
|
|
||||||
yp_token_t content = parser->current;
|
|
||||||
parser_lex(parser);
|
parser_lex(parser);
|
||||||
|
|
||||||
if (accept(parser, YP_TOKEN_STRING_END)) {
|
if (accept(parser, YP_TOKEN_STRING_END)) {
|
||||||
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
|
// If we get here, then we have an end immediately after a
|
||||||
|
// start. In that case we'll create an empty content token
|
||||||
|
// and return an uninterpolated string.
|
||||||
|
yp_token_t content = (yp_token_t) {
|
||||||
|
.type = YP_TOKEN_STRING_CONTENT,
|
||||||
|
.start = parser->previous.start,
|
||||||
|
.end = parser->previous.start
|
||||||
|
};
|
||||||
|
|
||||||
|
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE);
|
||||||
} else if (accept(parser, YP_TOKEN_LABEL_END)) {
|
} else if (accept(parser, YP_TOKEN_LABEL_END)) {
|
||||||
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
|
// If we get here, then we have an end of a label
|
||||||
|
// immediately after a start. In that case we'll create an
|
||||||
|
// empty symbol node.
|
||||||
|
yp_token_t opening = not_provided(parser);
|
||||||
|
yp_token_t content = (yp_token_t) {
|
||||||
|
.type = YP_TOKEN_STRING_CONTENT,
|
||||||
|
.start = parser->previous.start,
|
||||||
|
.end = parser->previous.start
|
||||||
|
};
|
||||||
|
|
||||||
|
node = (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
|
||||||
|
} else if (!lex_interpolation) {
|
||||||
|
// If we don't accept interpolation then we expect the
|
||||||
|
// string to start with a single string content node.
|
||||||
|
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter.");
|
||||||
|
yp_token_t content = parser->previous;
|
||||||
|
|
||||||
|
// It is unfortunately possible to have multiple string
|
||||||
|
// content nodes in a row in the case that there's heredoc
|
||||||
|
// content in the middle of the string, like this cursed
|
||||||
|
// example:
|
||||||
|
//
|
||||||
|
// <<-END+'b
|
||||||
|
// a
|
||||||
|
// END
|
||||||
|
// c'+'d'
|
||||||
|
//
|
||||||
|
// In that case we need to switch to an interpolated string
|
||||||
|
// to be able to contain all of the parts.
|
||||||
|
if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
|
||||||
|
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
|
||||||
|
|
||||||
|
yp_token_t delimiters = not_provided(parser);
|
||||||
|
yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, YP_UNESCAPE_MINIMAL);
|
||||||
|
yp_node_list_append(&parts, part);
|
||||||
|
|
||||||
|
while (accept(parser, YP_TOKEN_STRING_CONTENT)) {
|
||||||
|
part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, YP_UNESCAPE_MINIMAL);
|
||||||
|
yp_node_list_append(&parts, part);
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
|
||||||
|
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
||||||
|
} else if (accept(parser, YP_TOKEN_LABEL_END)) {
|
||||||
|
node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
|
||||||
|
} else {
|
||||||
|
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
|
||||||
|
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_MINIMAL);
|
||||||
|
}
|
||||||
|
} else if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
|
||||||
|
// In this case we've hit string content so we know the string at
|
||||||
|
// least has something in it. We'll need to check if the following
|
||||||
|
// token is the end (in which case we can return a plain string) or if
|
||||||
|
// it's not then it has interpolation.
|
||||||
|
yp_token_t content = parser->current;
|
||||||
|
parser_lex(parser);
|
||||||
|
|
||||||
|
if (accept(parser, YP_TOKEN_STRING_END)) {
|
||||||
|
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
|
||||||
|
} else if (accept(parser, YP_TOKEN_LABEL_END)) {
|
||||||
|
node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
|
||||||
|
} else {
|
||||||
|
// If we get here, then we have interpolation so we'll need to create
|
||||||
|
// a string or symbol node with interpolation.
|
||||||
|
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
|
||||||
|
yp_token_t string_opening = not_provided(parser);
|
||||||
|
yp_token_t string_closing = not_provided(parser);
|
||||||
|
yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, YP_UNESCAPE_ALL);
|
||||||
|
yp_node_list_append(&parts, part);
|
||||||
|
|
||||||
|
while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
|
||||||
|
yp_node_t *part = parse_string_part(parser);
|
||||||
|
if (part != NULL) yp_node_list_append(&parts, part);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (accept(parser, YP_TOKEN_LABEL_END)) {
|
||||||
|
node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
||||||
|
} else {
|
||||||
|
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
|
||||||
|
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// If we get here, then we have interpolation so we'll need to create
|
// If we get here, then the first part of the string is not plain string
|
||||||
// a string or symbol node with interpolation.
|
// content, in which case we need to parse the string as an interpolated
|
||||||
|
// string.
|
||||||
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
|
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
|
||||||
yp_token_t string_opening = not_provided(parser);
|
|
||||||
yp_token_t string_closing = not_provided(parser);
|
|
||||||
yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, YP_UNESCAPE_ALL);
|
|
||||||
yp_node_list_append(&parts, part);
|
|
||||||
|
|
||||||
while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
|
while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
|
||||||
yp_node_t *part = parse_string_part(parser);
|
yp_node_t *part = parse_string_part(parser);
|
||||||
@ -12046,43 +12066,39 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (accept(parser, YP_TOKEN_LABEL_END)) {
|
if (accept(parser, YP_TOKEN_LABEL_END)) {
|
||||||
return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
||||||
|
} else {
|
||||||
|
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
|
||||||
|
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result == NULL) {
|
||||||
|
// If the node we just parsed is a symbol node, then we
|
||||||
|
// can't concatenate it with anything else, so we can now
|
||||||
|
// return that node.
|
||||||
|
if (YP_NODE_TYPE_P(node, YP_NODE_SYMBOL_NODE) || YP_NODE_TYPE_P(node, YP_NODE_INTERPOLATED_SYMBOL_NODE)) {
|
||||||
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
|
// If we don't already have a node, then it's fine and we
|
||||||
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
// can just set the result to be the node we just parsed.
|
||||||
}
|
result = node;
|
||||||
} else {
|
} else {
|
||||||
// If we get here, then the first part of the string is not plain string
|
// Otherwise we need to check the type of the node we just
|
||||||
// content, in which case we need to parse the string as an interpolated
|
// parsed. If it cannot be concatenated with the previous
|
||||||
// string.
|
// node, then we'll need to add a syntax error.
|
||||||
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
|
if (!YP_NODE_TYPE_P(node, YP_NODE_STRING_NODE) && !YP_NODE_TYPE_P(node, YP_NODE_INTERPOLATED_STRING_NODE)) {
|
||||||
|
yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, "Unexpected string concatenation.");
|
||||||
|
}
|
||||||
|
|
||||||
while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
|
// Either way we will create a concat node to hold the
|
||||||
yp_node_t *part = parse_string_part(parser);
|
// strings together.
|
||||||
if (part != NULL) yp_node_list_append(&parts, part);
|
result = (yp_node_t *) yp_string_concat_node_create(parser, result, node);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (accept(parser, YP_TOKEN_LABEL_END)) {
|
|
||||||
return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
|
|
||||||
}
|
|
||||||
|
|
||||||
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
|
|
||||||
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If there's a string immediately following this string, then it's a
|
return result;
|
||||||
// concatenatation. In this case we'll parse the next string and create a
|
|
||||||
// node in the tree that concatenates the two strings.
|
|
||||||
if (parser->current.type == YP_TOKEN_STRING_BEGIN) {
|
|
||||||
return (yp_node_t *) yp_string_concat_node_create(
|
|
||||||
parser,
|
|
||||||
node,
|
|
||||||
parse_expression(parser, YP_BINDING_POWER_CALL, "Expected string on the right side of concatenation.")
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
return node;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
case YP_TOKEN_SYMBOL_BEGIN: {
|
case YP_TOKEN_SYMBOL_BEGIN: {
|
||||||
yp_lex_mode_t lex_mode = *parser->lex_modes.current;
|
yp_lex_mode_t lex_mode = *parser->lex_modes.current;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user