[ruby/yarp] fix: support newline-terminated regular expressions
Previously, parsing a snippet like this: %r\nfoo\n would result in tracking the second newline twice, resulting in a failed runtime assertion. Fixing that issue reveals another bug, which is that the _first_ newline was not being tracked at all. So we introduce a call to yp_newline_list right when we construct the REGEXP_BEGIN token. https://github.com/ruby/yarp/commit/0d5d759091
This commit is contained in:
parent
f83c1d62bd
commit
ac819f4db9
6
test/snapshots/newline-terminated-things.txt
Normal file
6
test/snapshots/newline-terminated-things.txt
Normal file
@ -0,0 +1,6 @@
|
||||
ProgramNode(0...7)(
|
||||
[],
|
||||
StatementsNode(0...7)(
|
||||
[RegularExpressionNode(0...7)((0...3), (3...6), (6...7), "foo", 0)]
|
||||
)
|
||||
)
|
2
test/yarp/fixtures/newline-terminated-things.txt
Normal file
2
test/yarp/fixtures/newline-terminated-things.txt
Normal file
@ -0,0 +1,2 @@
|
||||
%r
|
||||
foo
|
@ -120,6 +120,8 @@ class ParseTest < Test::Unit::TestCase
|
||||
end
|
||||
|
||||
Dir["*.txt", base: base].each do |relative|
|
||||
next if relative == "newline_terminated.txt"
|
||||
|
||||
# We test every snippet (separated by \n\n) in isolation
|
||||
# to ensure the parser does not try to read bytes further than the end of each snippet
|
||||
define_method "test_individual_snippets_#{relative}" do
|
||||
|
11
yarp/yarp.c
11
yarp/yarp.c
@ -6215,6 +6215,9 @@ parser_lex(yp_parser_t *parser) {
|
||||
|
||||
if (parser->current.end < parser->end) {
|
||||
lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
|
||||
if (parser->current.end == '\n') {
|
||||
yp_newline_list_append(&parser->newline_list, parser->current.end);
|
||||
}
|
||||
parser->current.end++;
|
||||
}
|
||||
|
||||
@ -6526,7 +6529,13 @@ parser_lex(yp_parser_t *parser) {
|
||||
// If we've hit a newline, then we need to track that in the
|
||||
// list of newlines.
|
||||
if (*breakpoint == '\n') {
|
||||
yp_newline_list_append(&parser->newline_list, breakpoint);
|
||||
// For the special case of a newline-terminated regular expression, we will pass
|
||||
// through this branch twice -- once with YP_TOKEN_REGEXP_BEGIN and then again
|
||||
// with YP_TOKEN_STRING_CONTENT. Let's avoid tracking the newline twice, by
|
||||
// tracking it only in the REGEXP_BEGIN case.
|
||||
if (!(lex_mode->as.regexp.terminator == '\n' && parser->current.type != YP_TOKEN_REGEXP_BEGIN)) {
|
||||
yp_newline_list_append(&parser->newline_list, breakpoint);
|
||||
}
|
||||
|
||||
if (lex_mode->as.regexp.terminator != '\n') {
|
||||
// If the terminator is not a newline, then we can set
|
||||
|
Loading…
x
Reference in New Issue
Block a user