[ruby/yarp] fix: handling escaped whitespace in a %w list
Introduces a new flavor of unescaping, YP_UNESCAPE_WHITESPACE, which is the same as MINIMAL but also unescapes whitespace. Note that a spanning_heredoc.txt fixture test is updated to be less wrong, but YARP's behavior doesn't yet fully match Ruby in this case. Fixes https://github.com/ruby/yarp/pull/1505 https://github.com/ruby/yarp/commit/0af69bdeb1
This commit is contained in:
parent
ffc1fc7a6d
commit
0cda3ac454
@ -179,7 +179,7 @@
|
|||||||
│ │ │ │ ├── opening_loc: ∅
|
│ │ │ │ ├── opening_loc: ∅
|
||||||
│ │ │ │ ├── content_loc: (532...535) = "j\\\n"
|
│ │ │ │ ├── content_loc: (532...535) = "j\\\n"
|
||||||
│ │ │ │ ├── closing_loc: ∅
|
│ │ │ │ ├── closing_loc: ∅
|
||||||
│ │ │ │ └── unescaped: "j\\\n"
|
│ │ │ │ └── unescaped: "j\n"
|
||||||
│ │ │ └── @ StringNode (location: (539...540))
|
│ │ │ └── @ StringNode (location: (539...540))
|
||||||
│ │ │ ├── flags: ∅
|
│ │ │ ├── flags: ∅
|
||||||
│ │ │ ├── opening_loc: ∅
|
│ │ │ ├── opening_loc: ∅
|
||||||
|
@ -292,7 +292,7 @@
|
|||||||
│ │ │ ├── opening_loc: ∅
|
│ │ │ ├── opening_loc: ∅
|
||||||
│ │ │ ├── content_loc: (290...298) = "foo\\ bar"
|
│ │ │ ├── content_loc: (290...298) = "foo\\ bar"
|
||||||
│ │ │ ├── closing_loc: ∅
|
│ │ │ ├── closing_loc: ∅
|
||||||
│ │ │ └── unescaped: "foo\\ bar"
|
│ │ │ └── unescaped: "foo bar"
|
||||||
│ │ └── @ StringNode (location: (299...304))
|
│ │ └── @ StringNode (location: (299...304))
|
||||||
│ │ ├── flags: ∅
|
│ │ ├── flags: ∅
|
||||||
│ │ ├── opening_loc: ∅
|
│ │ ├── opening_loc: ∅
|
||||||
@ -308,7 +308,7 @@
|
|||||||
│ │ │ ├── opening_loc: ∅
|
│ │ │ ├── opening_loc: ∅
|
||||||
│ │ │ ├── content_loc: (310...318) = "foo\\ bar"
|
│ │ │ ├── content_loc: (310...318) = "foo\\ bar"
|
||||||
│ │ │ ├── closing_loc: ∅
|
│ │ │ ├── closing_loc: ∅
|
||||||
│ │ │ └── unescaped: "foo\\ bar"
|
│ │ │ └── unescaped: "foo bar"
|
||||||
│ │ └── @ StringNode (location: (319...322))
|
│ │ └── @ StringNode (location: (319...322))
|
||||||
│ │ ├── flags: ∅
|
│ │ ├── flags: ∅
|
||||||
│ │ ├── opening_loc: ∅
|
│ │ ├── opening_loc: ∅
|
||||||
|
@ -67,7 +67,7 @@
|
|||||||
│ │ ├── opening_loc: ∅
|
│ │ ├── opening_loc: ∅
|
||||||
│ │ ├── content_loc: (81...85) = "a\\\nb"
|
│ │ ├── content_loc: (81...85) = "a\\\nb"
|
||||||
│ │ ├── closing_loc: ∅
|
│ │ ├── closing_loc: ∅
|
||||||
│ │ └── unescaped: "a\\\nb"
|
│ │ └── unescaped: "a\nb"
|
||||||
│ ├── opening_loc: (78...81) = "%w{"
|
│ ├── opening_loc: (78...81) = "%w{"
|
||||||
│ └── closing_loc: (85...86) = "}"
|
│ └── closing_loc: (85...86) = "}"
|
||||||
├── @ XStringNode (location: (88...96))
|
├── @ XStringNode (location: (88...96))
|
||||||
|
@ -136,6 +136,13 @@ module YARP
|
|||||||
assert_unescape_all("g", "\\g")
|
assert_unescape_all("g", "\\g")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_whitespace_escaping_string_list
|
||||||
|
assert_equal("a b", Debug.unescape_whitespace("a\\ b"))
|
||||||
|
assert_equal("a\tb", Debug.unescape_whitespace("a\\\tb"))
|
||||||
|
assert_equal("a\nb", Debug.unescape_whitespace("a\\\nb"))
|
||||||
|
assert_equal("a\nb", Debug.unescape_whitespace("a\\\r\nb"))
|
||||||
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def unescape_all(source)
|
def unescape_all(source)
|
||||||
|
@ -491,6 +491,12 @@ unescape_minimal(VALUE self, VALUE source) {
|
|||||||
return unescape(source, YP_UNESCAPE_MINIMAL);
|
return unescape(source, YP_UNESCAPE_MINIMAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Escape the given string minimally plus whitespace. Returns the unescaped string.
|
||||||
|
static VALUE
|
||||||
|
unescape_whitespace(VALUE self, VALUE source) {
|
||||||
|
return unescape(source, YP_UNESCAPE_WHITESPACE);
|
||||||
|
}
|
||||||
|
|
||||||
// Unescape everything in the given string. Return the unescaped string.
|
// Unescape everything in the given string. Return the unescaped string.
|
||||||
static VALUE
|
static VALUE
|
||||||
unescape_all(VALUE self, VALUE source) {
|
unescape_all(VALUE self, VALUE source) {
|
||||||
@ -608,6 +614,7 @@ Init_yarp(void) {
|
|||||||
rb_define_singleton_method(rb_cYARPDebug, "named_captures", named_captures, 1);
|
rb_define_singleton_method(rb_cYARPDebug, "named_captures", named_captures, 1);
|
||||||
rb_define_singleton_method(rb_cYARPDebug, "unescape_none", unescape_none, 1);
|
rb_define_singleton_method(rb_cYARPDebug, "unescape_none", unescape_none, 1);
|
||||||
rb_define_singleton_method(rb_cYARPDebug, "unescape_minimal", unescape_minimal, 1);
|
rb_define_singleton_method(rb_cYARPDebug, "unescape_minimal", unescape_minimal, 1);
|
||||||
|
rb_define_singleton_method(rb_cYARPDebug, "unescape_whitespace", unescape_whitespace, 1);
|
||||||
rb_define_singleton_method(rb_cYARPDebug, "unescape_all", unescape_all, 1);
|
rb_define_singleton_method(rb_cYARPDebug, "unescape_all", unescape_all, 1);
|
||||||
rb_define_singleton_method(rb_cYARPDebug, "memsize", memsize, 1);
|
rb_define_singleton_method(rb_cYARPDebug, "memsize", memsize, 1);
|
||||||
rb_define_singleton_method(rb_cYARPDebug, "profile_file", profile_file, 1);
|
rb_define_singleton_method(rb_cYARPDebug, "profile_file", profile_file, 1);
|
||||||
|
@ -509,7 +509,17 @@ yp_unescape_manipulate_string_or_char_literal(yp_parser_t *parser, yp_string_t *
|
|||||||
cursor = backslash + 2;
|
cursor = backslash + 2;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
if (unescape_type == YP_UNESCAPE_MINIMAL) {
|
if (unescape_type == YP_UNESCAPE_WHITESPACE) {
|
||||||
|
if (backslash[1] == '\r' && backslash[2] == '\n') {
|
||||||
|
cursor = backslash + 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (yp_strspn_whitespace(backslash + 1, 1)) {
|
||||||
|
cursor = backslash + 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (unescape_type == YP_UNESCAPE_WHITESPACE || unescape_type == YP_UNESCAPE_MINIMAL) {
|
||||||
// In this case we're escaping something that doesn't need escaping.
|
// In this case we're escaping something that doesn't need escaping.
|
||||||
dest[dest_length++] = '\\';
|
dest[dest_length++] = '\\';
|
||||||
cursor = backslash + 1;
|
cursor = backslash + 1;
|
||||||
@ -579,7 +589,16 @@ yp_unescape_calculate_difference(yp_parser_t *parser, const uint8_t *backslash,
|
|||||||
case '\'':
|
case '\'':
|
||||||
return 2;
|
return 2;
|
||||||
default: {
|
default: {
|
||||||
if (unescape_type == YP_UNESCAPE_MINIMAL) {
|
if (unescape_type == YP_UNESCAPE_WHITESPACE) {
|
||||||
|
if (backslash[1] == '\r' && backslash[2] == '\n') {
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
size_t whitespace = yp_strspn_whitespace(backslash + 1, 1);
|
||||||
|
if (whitespace > 0) {
|
||||||
|
return whitespace;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (unescape_type == YP_UNESCAPE_WHITESPACE || unescape_type == YP_UNESCAPE_MINIMAL) {
|
||||||
return 1 + yp_char_width(parser, backslash + 1, parser->end);
|
return 1 + yp_char_width(parser, backslash + 1, parser->end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,9 +24,13 @@ typedef enum {
|
|||||||
// single quotes and backslashes.
|
// single quotes and backslashes.
|
||||||
YP_UNESCAPE_MINIMAL,
|
YP_UNESCAPE_MINIMAL,
|
||||||
|
|
||||||
|
// When we're unescaping a string list, in addition to MINIMAL, we need to
|
||||||
|
// unescape whitespace.
|
||||||
|
YP_UNESCAPE_WHITESPACE,
|
||||||
|
|
||||||
// When we're unescaping a double-quoted string, we need to unescape all
|
// When we're unescaping a double-quoted string, we need to unescape all
|
||||||
// escapes.
|
// escapes.
|
||||||
YP_UNESCAPE_ALL
|
YP_UNESCAPE_ALL,
|
||||||
} yp_unescape_type_t;
|
} yp_unescape_type_t;
|
||||||
|
|
||||||
// Unescape the contents of the given token into the given string using the given unescape mode.
|
// Unescape the contents of the given token into the given string using the given unescape mode.
|
||||||
|
@ -12987,7 +12987,7 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
|
|||||||
|
|
||||||
yp_token_t opening = not_provided(parser);
|
yp_token_t opening = not_provided(parser);
|
||||||
yp_token_t closing = not_provided(parser);
|
yp_token_t closing = not_provided(parser);
|
||||||
yp_node_t *string = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_MINIMAL);
|
yp_node_t *string = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_WHITESPACE);
|
||||||
yp_array_node_elements_append(array, string);
|
yp_array_node_elements_append(array, string);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user