diff --git a/test/yarp/fixtures/wrapping_heredoc.txt b/test/yarp/fixtures/wrapping_heredoc.txt new file mode 100644 index 0000000000..d5fc710178 --- /dev/null +++ b/test/yarp/fixtures/wrapping_heredoc.txt @@ -0,0 +1,13 @@ +# test regex, string, and lists that wrap a heredoc thanks to an escaped newline + +# ripper incorrectly creates a "b\nc" string instead of two separate string tokens +pp <<-A.gsub(/b\ +a +A +c/, "") + +# ripper incorrectly creates a "e\nf" string instead of two separate string tokens +pp <<-A + "e\ +d +A +f" diff --git a/test/yarp/parse_test.rb b/test/yarp/parse_test.rb index c0f3ecf551..f8c1fe12d1 100644 --- a/test/yarp/parse_test.rb +++ b/test/yarp/parse_test.rb @@ -112,6 +112,10 @@ class ParseTest < Test::Unit::TestCase # Waiting for feedback on https://bugs.ruby-lang.org/issues/19838. return if relative == "seattlerb/heredoc_nested.txt" + # Ripper seems to have a bug that the regex portions before and after the heredoc are combined + # into a single token. + return if relative == "wrapping_heredoc.txt" + # Finally, assert that we can lex the source and get the same tokens as # Ripper. lex_result = YARP.lex_compat(source) diff --git a/test/yarp/snapshots/wrapping_heredoc.txt b/test/yarp/snapshots/wrapping_heredoc.txt new file mode 100644 index 0000000000..674db56ed1 --- /dev/null +++ b/test/yarp/snapshots/wrapping_heredoc.txt @@ -0,0 +1,80 @@ +ProgramNode(165...298)( + [], + StatementsNode(165...298)( + [CallNode(165...193)( + nil, + nil, + (165...167), + nil, + ArgumentsNode(168...193)( + [CallNode(168...193)( + InterpolatedStringNode(168...172)( + (168...172), + [StringNode(182...184)(nil, (182...184), nil, "a\n")], + (184...186) + ), + (172...173), + (173...177), + (177...178), + ArgumentsNode(178...192)( + [InterpolatedRegularExpressionNode(178...188)( + (178...179), + [StringNode(179...182)(nil, (179...182), nil, "b"), + StringNode(186...187)(nil, (186...187), nil, "c")], + (187...188), + 0 + ), + StringNode(190...192)( + (190...191), + (191...191), + (191...192), + "" + )] + ), + (192...193), + nil, + 0, + "gsub" + )] + ), + nil, + nil, + 0, + "pp" + ), + CallNode(278...298)( + nil, + nil, + (278...280), + nil, + ArgumentsNode(281...298)( + [CallNode(281...298)( + InterpolatedStringNode(281...285)( + (281...285), + [StringNode(292...294)(nil, (292...294), nil, "d\n")], + (294...296) + ), + nil, + (286...287), + nil, + ArgumentsNode(288...298)( + [InterpolatedStringNode(288...298)( + (288...289), + [StringNode(289...292)(nil, (289...292), nil, "e"), + StringNode(296...297)(nil, (296...297), nil, "f")], + (297...298) + )] + ), + nil, + nil, + 0, + "+" + )] + ), + nil, + nil, + 0, + "pp" + )] + ) +) diff --git a/yarp/yarp.c b/yarp/yarp.c index a7b2290aa2..fdcc303b7f 100644 --- a/yarp/yarp.c +++ b/yarp/yarp.c @@ -6614,7 +6614,13 @@ parser_lex(yp_parser_t *parser) { case YP_LEX_REGEXP: { // First, we'll set to start of this token to be the current end. - parser->current.start = parser->current.end; + if (parser->next_start == NULL) { + parser->current.start = parser->current.end; + } else { + parser->current.start = parser->next_start; + parser->current.end = parser->next_start; + parser->next_start = NULL; + } // We'll check if we're at the end of the file. If we are, then we need to // return the EOF token. @@ -6693,9 +6699,19 @@ parser_lex(yp_parser_t *parser) { if (*breakpoint == '\\') { size_t difference = yp_unescape_calculate_difference(parser, breakpoint, YP_UNESCAPE_ALL, false); - // If the result is an escaped newline, then we need to - // track that newline. - yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1); + // If the result is an escaped newline ... + if (*(breakpoint + difference - 1) == '\n') { + if (parser->heredoc_end) { + // ... if we are on the same line as a heredoc, flush the heredoc and + // continue parsing after heredoc_end. + parser->current.end = breakpoint + difference; + parser_flush_heredoc_end(parser); + LEX(YP_TOKEN_STRING_CONTENT); + } else { + // ... else track the newline. + yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1); + } + } breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference)); continue; @@ -6833,9 +6849,19 @@ parser_lex(yp_parser_t *parser) { yp_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL; size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false); - // If the result is an escaped newline, then we need to - // track that newline. - yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1); + // If the result is an escaped newline ... + if (*(breakpoint + difference - 1) == '\n') { + if (parser->heredoc_end) { + // ... if we are on the same line as a heredoc, flush the heredoc and + // continue parsing after heredoc_end. + parser->current.end = breakpoint + difference; + parser_flush_heredoc_end(parser); + LEX(YP_TOKEN_STRING_CONTENT); + } else { + // ... else track the newline. + yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1); + } + } breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference)); break;