diff --git a/test/snapshots/newline-terminated-things.txt b/test/snapshots/newline-terminated-things.txt new file mode 100644 index 0000000000..e68ea1658e --- /dev/null +++ b/test/snapshots/newline-terminated-things.txt @@ -0,0 +1,6 @@ +ProgramNode(0...7)( + [], + StatementsNode(0...7)( + [RegularExpressionNode(0...7)((0...3), (3...6), (6...7), "foo", 0)] + ) +) diff --git a/test/yarp/fixtures/newline-terminated-things.txt b/test/yarp/fixtures/newline-terminated-things.txt new file mode 100644 index 0000000000..27e7c62e8e --- /dev/null +++ b/test/yarp/fixtures/newline-terminated-things.txt @@ -0,0 +1,2 @@ +%r +foo diff --git a/test/yarp/parse_test.rb b/test/yarp/parse_test.rb index 7a910f7db2..33eb1475f5 100644 --- a/test/yarp/parse_test.rb +++ b/test/yarp/parse_test.rb @@ -120,6 +120,8 @@ class ParseTest < Test::Unit::TestCase end Dir["*.txt", base: base].each do |relative| + next if relative == "newline_terminated.txt" + # We test every snippet (separated by \n\n) in isolation # to ensure the parser does not try to read bytes further than the end of each snippet define_method "test_individual_snippets_#{relative}" do diff --git a/yarp/yarp.c b/yarp/yarp.c index a61c9e15b8..df6bde14b3 100644 --- a/yarp/yarp.c +++ b/yarp/yarp.c @@ -6215,6 +6215,9 @@ parser_lex(yp_parser_t *parser) { if (parser->current.end < parser->end) { lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end)); + if (parser->current.end == '\n') { + yp_newline_list_append(&parser->newline_list, parser->current.end); + } parser->current.end++; } @@ -6526,7 +6529,13 @@ parser_lex(yp_parser_t *parser) { // If we've hit a newline, then we need to track that in the // list of newlines. if (*breakpoint == '\n') { - yp_newline_list_append(&parser->newline_list, breakpoint); + // For the special case of a newline-terminated regular expression, we will pass + // through this branch twice -- once with YP_TOKEN_REGEXP_BEGIN and then again + // with YP_TOKEN_STRING_CONTENT. Let's avoid tracking the newline twice, by + // tracking it only in the REGEXP_BEGIN case. + if (!(lex_mode->as.regexp.terminator == '\n' && parser->current.type != YP_TOKEN_REGEXP_BEGIN)) { + yp_newline_list_append(&parser->newline_list, breakpoint); + } if (lex_mode->as.regexp.terminator != '\n') { // If the terminator is not a newline, then we can set