diff --git a/prism/prism.c b/prism/prism.c index 56e83d02b0..a11d8ebcea 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -10503,6 +10503,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) { } const uint8_t *end = parser->current.end - 1; + assert(end >= start); pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start)); token_buffer->cursor = end; @@ -10583,9 +10584,15 @@ pm_lex_percent_delimiter(pm_parser_t *parser) { pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1); } - const uint8_t delimiter = *parser->current.end; - parser->current.end += eol_length; + uint8_t delimiter = *parser->current.end; + // If our delimiter is \r\n, we want to treat it as if it's \n. + // For example, %\r\nfoo\r\n should be "foo" + if (eol_length == 2) { + delimiter = *(parser->current.end + 1); + } + + parser->current.end += eol_length; return delimiter; } @@ -12335,10 +12342,28 @@ parser_lex(pm_parser_t *parser) { continue; } + bool is_terminator = (*breakpoint == lex_mode->as.string.terminator); + + // If the terminator is newline, we need to consider \r\n _also_ a newline + // For example: `%\nfoo\r\n` + // The string should be "foo", not "foo\r" + if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') { + if (lex_mode->as.string.terminator == '\n') { + is_terminator = true; + } + + // If the terminator is a CR, but we see a CRLF, we need to + // treat the CRLF as a newline, meaning this is _not_ the + // terminator + if (lex_mode->as.string.terminator == '\r') { + is_terminator = false; + } + } + // Note that we have to check the terminator here first because we could // potentially be parsing a % string that has a # character as the // terminator. - if (*breakpoint == lex_mode->as.string.terminator) { + if (is_terminator) { // If this terminator doesn't actually close the string, then we need // to continue on past it. if (lex_mode->as.string.nesting > 0) { diff --git a/test/prism/percent_delimiter_string_test.rb b/test/prism/percent_delimiter_string_test.rb new file mode 100644 index 0000000000..4cf5990dcf --- /dev/null +++ b/test/prism/percent_delimiter_string_test.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +require_relative "test_helper" + +module Prism + class PercentDelimiterStringTest < TestCase + def test_newline_terminator_with_lf_crlf + str = "%\n123456\r\n" + assert_parse "123456", str + end + + def test_newline_terminator_with_lf_crlf_with_extra_cr + str = "%\n123456\r\r\n" + assert_parse "123456\r", str + end + + def test_newline_terminator_with_crlf_pair + str = "%\r\n123456\r\n" + assert_parse "123456", str + end + + def test_newline_terminator_with_crlf_crlf_with_extra_cr + str = "%\r\n123456\r\r\n" + assert_parse "123456\r", str + end + + def test_newline_terminator_with_cr_cr + str = "%\r123456\r;\n" + assert_parse "123456", str + end + + def test_newline_terminator_with_crlf_lf + str = "%\r\n123456\n;\n" + assert_parse "123456", str + end + + def test_cr_crlf + str = "%\r1\r\n \r" + assert_parse "1\n ", str + end + + def test_lf_crlf + str = "%\n1\r\n \n" + assert_parse "1", str + end + + def test_lf_lf + str = "%\n1\n \n" + assert_parse "1", str + end + + def assert_parse(expected, str) + tree = Prism.parse str + node = tree.value.breadth_first_search { |x| Prism::StringNode === x } + assert_equal expected, node.unescaped + end + end +end