[ruby/prism] Fix percent delimiter strings with crlfs
parse.y treats CRLF as a LF and basically "normalizes" them before parsing. That means a string like `%\nfoo\r\n` is actually treated as `%\nfoo\n` for the purposes of parsing. This happens on both the opening side of the percent string as well as on the closing side. So for example `%\r\nfoo\n` must be treated as `%\nfoo\n`. To handle this in Prism, when we start a % string, we check if it starts with `\r\n`, and then consider the terminator to actually be `\n`. Then we check if there are `\r\n` as we lex the string and treat those as `\n`, but only in the case the start was a `\n`. Fixes: #3230 [Bug #20938] https://github.com/ruby/prism/commit/e573ceaad6 Co-authored-by: John Hawthorn <jhawthorn@github.com> Co-authored-by: eileencodes <eileencodes@gmail.com> Co-authored-by: Kevin Newton <kddnewton@gmail.com>
This commit is contained in:
parent
d53e4545f4
commit
9fe6fd8693
@ -10503,6 +10503,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
|
||||
}
|
||||
|
||||
const uint8_t *end = parser->current.end - 1;
|
||||
assert(end >= start);
|
||||
pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
|
||||
|
||||
token_buffer->cursor = end;
|
||||
@ -10583,9 +10584,15 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
|
||||
pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
|
||||
}
|
||||
|
||||
const uint8_t delimiter = *parser->current.end;
|
||||
parser->current.end += eol_length;
|
||||
uint8_t delimiter = *parser->current.end;
|
||||
|
||||
// If our delimiter is \r\n, we want to treat it as if it's \n.
|
||||
// For example, %\r\nfoo\r\n should be "foo"
|
||||
if (eol_length == 2) {
|
||||
delimiter = *(parser->current.end + 1);
|
||||
}
|
||||
|
||||
parser->current.end += eol_length;
|
||||
return delimiter;
|
||||
}
|
||||
|
||||
@ -12335,10 +12342,28 @@ parser_lex(pm_parser_t *parser) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool is_terminator = (*breakpoint == lex_mode->as.string.terminator);
|
||||
|
||||
// If the terminator is newline, we need to consider \r\n _also_ a newline
|
||||
// For example: `%\nfoo\r\n`
|
||||
// The string should be "foo", not "foo\r"
|
||||
if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
|
||||
if (lex_mode->as.string.terminator == '\n') {
|
||||
is_terminator = true;
|
||||
}
|
||||
|
||||
// If the terminator is a CR, but we see a CRLF, we need to
|
||||
// treat the CRLF as a newline, meaning this is _not_ the
|
||||
// terminator
|
||||
if (lex_mode->as.string.terminator == '\r') {
|
||||
is_terminator = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Note that we have to check the terminator here first because we could
|
||||
// potentially be parsing a % string that has a # character as the
|
||||
// terminator.
|
||||
if (*breakpoint == lex_mode->as.string.terminator) {
|
||||
if (is_terminator) {
|
||||
// If this terminator doesn't actually close the string, then we need
|
||||
// to continue on past it.
|
||||
if (lex_mode->as.string.nesting > 0) {
|
||||
|
58
test/prism/percent_delimiter_string_test.rb
Normal file
58
test/prism/percent_delimiter_string_test.rb
Normal file
@ -0,0 +1,58 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require_relative "test_helper"
|
||||
|
||||
module Prism
|
||||
class PercentDelimiterStringTest < TestCase
|
||||
def test_newline_terminator_with_lf_crlf
|
||||
str = "%\n123456\r\n"
|
||||
assert_parse "123456", str
|
||||
end
|
||||
|
||||
def test_newline_terminator_with_lf_crlf_with_extra_cr
|
||||
str = "%\n123456\r\r\n"
|
||||
assert_parse "123456\r", str
|
||||
end
|
||||
|
||||
def test_newline_terminator_with_crlf_pair
|
||||
str = "%\r\n123456\r\n"
|
||||
assert_parse "123456", str
|
||||
end
|
||||
|
||||
def test_newline_terminator_with_crlf_crlf_with_extra_cr
|
||||
str = "%\r\n123456\r\r\n"
|
||||
assert_parse "123456\r", str
|
||||
end
|
||||
|
||||
def test_newline_terminator_with_cr_cr
|
||||
str = "%\r123456\r;\n"
|
||||
assert_parse "123456", str
|
||||
end
|
||||
|
||||
def test_newline_terminator_with_crlf_lf
|
||||
str = "%\r\n123456\n;\n"
|
||||
assert_parse "123456", str
|
||||
end
|
||||
|
||||
def test_cr_crlf
|
||||
str = "%\r1\r\n \r"
|
||||
assert_parse "1\n ", str
|
||||
end
|
||||
|
||||
def test_lf_crlf
|
||||
str = "%\n1\r\n \n"
|
||||
assert_parse "1", str
|
||||
end
|
||||
|
||||
def test_lf_lf
|
||||
str = "%\n1\n \n"
|
||||
assert_parse "1", str
|
||||
end
|
||||
|
||||
def assert_parse(expected, str)
|
||||
tree = Prism.parse str
|
||||
node = tree.value.breadth_first_search { |x| Prism::StringNode === x }
|
||||
assert_equal expected, node.unescaped
|
||||
end
|
||||
end
|
||||
end
|
Loading…
x
Reference in New Issue
Block a user