[ruby/yarp] Fix string concat parsing

https://github.com/ruby/yarp/commit/58f839a3eb
This commit is contained in:
Kevin Newton 2023-08-07 16:55:02 -04:00 committed by Takashi Kokubun
parent 77e52735f0
commit ee885237f6
Notes: git 2023-08-17 00:48:02 +00:00
2 changed files with 133 additions and 117 deletions

View File

@ -198,11 +198,11 @@ ProgramNode(0...608)(
) )
), ),
StringConcatNode(552...560)( StringConcatNode(552...560)(
StringNode(552...554)((552...553), (553...553), (553...554), ""), StringConcatNode(552...557)(
StringConcatNode(555...560)( StringNode(552...554)((552...553), (553...553), (553...554), ""),
StringNode(555...557)((555...556), (556...556), (556...557), ""), StringNode(555...557)((555...556), (556...556), (556...557), "")
StringNode(558...560)((558...559), (559...559), (559...560), "") ),
) StringNode(558...560)((558...559), (559...559), (559...560), "")
), ),
StringConcatNode(562...574)( StringConcatNode(562...574)(
InterpolatedStringNode(562...570)( InterpolatedStringNode(562...570)(

View File

@ -11950,95 +11950,115 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
return (yp_node_t *) node; return (yp_node_t *) node;
} }
case YP_TOKEN_STRING_BEGIN: { case YP_TOKEN_STRING_BEGIN: {
assert(parser->lex_modes.current->mode == YP_LEX_STRING); yp_node_t *result = NULL;
bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
yp_token_t opening = parser->current; while (match_type_p(parser, YP_TOKEN_STRING_BEGIN)) {
parser_lex(parser); assert(parser->lex_modes.current->mode == YP_LEX_STRING);
bool lex_interpolation = parser->lex_modes.current->as.string.interpolation;
yp_node_t *node; yp_node_t *node = NULL;
yp_token_t opening = parser->current;
if (accept(parser, YP_TOKEN_STRING_END)) {
// If we get here, then we have an end immediately after a start. In
// that case we'll create an empty content token and return an
// uninterpolated string.
yp_token_t content = (yp_token_t) {
.type = YP_TOKEN_STRING_CONTENT,
.start = parser->previous.start,
.end = parser->previous.start
};
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE);
} else if (accept(parser, YP_TOKEN_LABEL_END)) {
// If we get here, then we have an end of a label immediately after a
// start. In that case we'll create an empty symbol node.
yp_token_t opening = not_provided(parser);
yp_token_t content = (yp_token_t) {
.type = YP_TOKEN_STRING_CONTENT,
.start = parser->previous.start,
.end = parser->previous.start
};
return (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
} else if (!lex_interpolation) {
// If we don't accept interpolation then we expect the string to start
// with a single string content node.
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter.");
yp_token_t content = parser->previous;
// It is unfortunately possible to have multiple string content nodes in
// a row in the case that there's heredoc content in the middle of the
// string, like this cursed example:
//
// <<-END+'b
// a
// END
// c'+'d'
//
// In that case we need to switch to an interpolated string to be able
// to contain all of the parts.
if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
yp_token_t delimiters = not_provided(parser);
yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, YP_UNESCAPE_MINIMAL);
yp_node_list_append(&parts, part);
while (accept(parser, YP_TOKEN_STRING_CONTENT)) {
part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, YP_UNESCAPE_MINIMAL);
yp_node_list_append(&parts, part);
}
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
return (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
}
if (accept(parser, YP_TOKEN_LABEL_END)) {
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
}
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_MINIMAL);
} else if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
// In this case we've hit string content so we know the string at
// least has something in it. We'll need to check if the following
// token is the end (in which case we can return a plain string) or if
// it's not then it has interpolation.
yp_token_t content = parser->current;
parser_lex(parser); parser_lex(parser);
if (accept(parser, YP_TOKEN_STRING_END)) { if (accept(parser, YP_TOKEN_STRING_END)) {
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL); // If we get here, then we have an end immediately after a
// start. In that case we'll create an empty content token
// and return an uninterpolated string.
yp_token_t content = (yp_token_t) {
.type = YP_TOKEN_STRING_CONTENT,
.start = parser->previous.start,
.end = parser->previous.start
};
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE);
} else if (accept(parser, YP_TOKEN_LABEL_END)) { } else if (accept(parser, YP_TOKEN_LABEL_END)) {
return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL); // If we get here, then we have an end of a label
// immediately after a start. In that case we'll create an
// empty symbol node.
yp_token_t opening = not_provided(parser);
yp_token_t content = (yp_token_t) {
.type = YP_TOKEN_STRING_CONTENT,
.start = parser->previous.start,
.end = parser->previous.start
};
node = (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous);
} else if (!lex_interpolation) {
// If we don't accept interpolation then we expect the
// string to start with a single string content node.
expect(parser, YP_TOKEN_STRING_CONTENT, "Expected string content after opening delimiter.");
yp_token_t content = parser->previous;
// It is unfortunately possible to have multiple string
// content nodes in a row in the case that there's heredoc
// content in the middle of the string, like this cursed
// example:
//
// <<-END+'b
// a
// END
// c'+'d'
//
// In that case we need to switch to an interpolated string
// to be able to contain all of the parts.
if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
yp_token_t delimiters = not_provided(parser);
yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, YP_UNESCAPE_MINIMAL);
yp_node_list_append(&parts, part);
while (accept(parser, YP_TOKEN_STRING_CONTENT)) {
part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, YP_UNESCAPE_MINIMAL);
yp_node_list_append(&parts, part);
}
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
} else if (accept(parser, YP_TOKEN_LABEL_END)) {
node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
} else {
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for a string literal.");
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_MINIMAL);
}
} else if (match_type_p(parser, YP_TOKEN_STRING_CONTENT)) {
// In this case we've hit string content so we know the string at
// least has something in it. We'll need to check if the following
// token is the end (in which case we can return a plain string) or if
// it's not then it has interpolation.
yp_token_t content = parser->current;
parser_lex(parser);
if (accept(parser, YP_TOKEN_STRING_END)) {
node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
} else if (accept(parser, YP_TOKEN_LABEL_END)) {
node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL);
} else {
// If we get here, then we have interpolation so we'll need to create
// a string or symbol node with interpolation.
yp_node_list_t parts = YP_EMPTY_NODE_LIST;
yp_token_t string_opening = not_provided(parser);
yp_token_t string_closing = not_provided(parser);
yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, YP_UNESCAPE_ALL);
yp_node_list_append(&parts, part);
while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
yp_node_t *part = parse_string_part(parser);
if (part != NULL) yp_node_list_append(&parts, part);
}
if (accept(parser, YP_TOKEN_LABEL_END)) {
node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
} else {
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
}
}
} else { } else {
// If we get here, then we have interpolation so we'll need to create // If we get here, then the first part of the string is not plain string
// a string or symbol node with interpolation. // content, in which case we need to parse the string as an interpolated
// string.
yp_node_list_t parts = YP_EMPTY_NODE_LIST; yp_node_list_t parts = YP_EMPTY_NODE_LIST;
yp_token_t string_opening = not_provided(parser);
yp_token_t string_closing = not_provided(parser);
yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, YP_UNESCAPE_ALL);
yp_node_list_append(&parts, part);
while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) { while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) {
yp_node_t *part = parse_string_part(parser); yp_node_t *part = parse_string_part(parser);
@ -12046,43 +12066,39 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
} }
if (accept(parser, YP_TOKEN_LABEL_END)) { if (accept(parser, YP_TOKEN_LABEL_END)) {
return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous); node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
} else {
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
}
}
if (result == NULL) {
// If the node we just parsed is a symbol node, then we
// can't concatenate it with anything else, so we can now
// return that node.
if (YP_NODE_TYPE_P(node, YP_NODE_SYMBOL_NODE) || YP_NODE_TYPE_P(node, YP_NODE_INTERPOLATED_SYMBOL_NODE)) {
return node;
} }
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string."); // If we don't already have a node, then it's fine and we
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous); // can just set the result to be the node we just parsed.
} result = node;
} else { } else {
// If we get here, then the first part of the string is not plain string // Otherwise we need to check the type of the node we just
// content, in which case we need to parse the string as an interpolated // parsed. If it cannot be concatenated with the previous
// string. // node, then we'll need to add a syntax error.
yp_node_list_t parts = YP_EMPTY_NODE_LIST; if (!YP_NODE_TYPE_P(node, YP_NODE_STRING_NODE) && !YP_NODE_TYPE_P(node, YP_NODE_INTERPOLATED_STRING_NODE)) {
yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, "Unexpected string concatenation.");
}
while (!match_any_type_p(parser, 3, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) { // Either way we will create a concat node to hold the
yp_node_t *part = parse_string_part(parser); // strings together.
if (part != NULL) yp_node_list_append(&parts, part); result = (yp_node_t *) yp_string_concat_node_create(parser, result, node);
} }
if (accept(parser, YP_TOKEN_LABEL_END)) {
return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
}
expect(parser, YP_TOKEN_STRING_END, "Expected a closing delimiter for an interpolated string.");
node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
} }
// If there's a string immediately following this string, then it's a return result;
// concatenatation. In this case we'll parse the next string and create a
// node in the tree that concatenates the two strings.
if (parser->current.type == YP_TOKEN_STRING_BEGIN) {
return (yp_node_t *) yp_string_concat_node_create(
parser,
node,
parse_expression(parser, YP_BINDING_POWER_CALL, "Expected string on the right side of concatenation.")
);
} else {
return node;
}
} }
case YP_TOKEN_SYMBOL_BEGIN: { case YP_TOKEN_SYMBOL_BEGIN: {
yp_lex_mode_t lex_mode = *parser->lex_modes.current; yp_lex_mode_t lex_mode = *parser->lex_modes.current;