[ruby/prism] Reject invalid capture groups (keywords)
https://github.com/ruby/prism/commit/bb78d83e88
This commit is contained in:
parent
bb3cbdfe2f
commit
8780059c38
110
prism/prism.c
110
prism/prism.c
@ -1184,6 +1184,77 @@ token_is_setter_name(pm_token_t *token) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the given local variable is a keyword.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
pm_local_is_keyword(const char *source, size_t length) {
|
||||||
|
#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
|
||||||
|
|
||||||
|
switch (length) {
|
||||||
|
case 2:
|
||||||
|
switch (source[0]) {
|
||||||
|
case 'd': KEYWORD("do"); return false;
|
||||||
|
case 'i': KEYWORD("if"); KEYWORD("in"); return false;
|
||||||
|
case 'o': KEYWORD("or"); return false;
|
||||||
|
default: return false;
|
||||||
|
}
|
||||||
|
case 3:
|
||||||
|
switch (source[0]) {
|
||||||
|
case 'a': KEYWORD("and"); return false;
|
||||||
|
case 'd': KEYWORD("def"); return false;
|
||||||
|
case 'e': KEYWORD("end"); return false;
|
||||||
|
case 'f': KEYWORD("for"); return false;
|
||||||
|
case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
|
||||||
|
default: return false;
|
||||||
|
}
|
||||||
|
case 4:
|
||||||
|
switch (source[0]) {
|
||||||
|
case 'c': KEYWORD("case"); return false;
|
||||||
|
case 'e': KEYWORD("else"); return false;
|
||||||
|
case 'n': KEYWORD("next"); return false;
|
||||||
|
case 'r': KEYWORD("redo"); return false;
|
||||||
|
case 's': KEYWORD("self"); return false;
|
||||||
|
case 't': KEYWORD("then"); KEYWORD("true"); return false;
|
||||||
|
case 'w': KEYWORD("when"); return false;
|
||||||
|
default: return false;
|
||||||
|
}
|
||||||
|
case 5:
|
||||||
|
switch (source[0]) {
|
||||||
|
case 'a': KEYWORD("alias"); return false;
|
||||||
|
case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
|
||||||
|
case 'c': KEYWORD("class"); return false;
|
||||||
|
case 'e': KEYWORD("elsif"); return false;
|
||||||
|
case 'f': KEYWORD("false"); return false;
|
||||||
|
case 'r': KEYWORD("retry"); return false;
|
||||||
|
case 's': KEYWORD("super"); return false;
|
||||||
|
case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
|
||||||
|
case 'w': KEYWORD("while"); return false;
|
||||||
|
case 'y': KEYWORD("yield"); return false;
|
||||||
|
default: return false;
|
||||||
|
}
|
||||||
|
case 6:
|
||||||
|
switch (source[0]) {
|
||||||
|
case 'e': KEYWORD("ensure"); return false;
|
||||||
|
case 'm': KEYWORD("module"); return false;
|
||||||
|
case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
|
||||||
|
case 'u': KEYWORD("unless"); return false;
|
||||||
|
default: return false;
|
||||||
|
}
|
||||||
|
case 8:
|
||||||
|
KEYWORD("__LINE__");
|
||||||
|
KEYWORD("__FILE__");
|
||||||
|
return false;
|
||||||
|
case 12:
|
||||||
|
KEYWORD("__ENCODING__");
|
||||||
|
return false;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef KEYWORD
|
||||||
|
}
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
/* Node flag handling functions */
|
/* Node flag handling functions */
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
@ -10576,19 +10647,19 @@ parser_lex(pm_parser_t *parser) {
|
|||||||
|
|
||||||
pm_token_type_t type = lex_identifier(parser, previous_command_start);
|
pm_token_type_t type = lex_identifier(parser, previous_command_start);
|
||||||
|
|
||||||
// If we've hit a __END__ and it was at the start of the line or the
|
// If we've hit a __END__ and it was at the start of the
|
||||||
// start of the file and it is followed by either a \n or a \r\n, then
|
// line or the start of the file and it is followed by
|
||||||
// this is the last token of the file.
|
// either a \n or a \r\n, then this is the last token of the
|
||||||
|
// file.
|
||||||
if (
|
if (
|
||||||
((parser->current.end - parser->current.start) == 7) &&
|
((parser->current.end - parser->current.start) == 7) &&
|
||||||
current_token_starts_line(parser) &&
|
current_token_starts_line(parser) &&
|
||||||
(memcmp(parser->current.start, "__END__", 7) == 0) &&
|
(memcmp(parser->current.start, "__END__", 7) == 0) &&
|
||||||
(parser->current.end == parser->end || match_eol(parser))
|
(parser->current.end == parser->end || match_eol(parser))
|
||||||
)
|
) {
|
||||||
{
|
// Since we know we're about to add an __END__ comment,
|
||||||
// Since we know we're about to add an __END__ comment, we know we
|
// we know we need to add all of the newlines to get the
|
||||||
// need to add all of the newlines to get the correct column
|
// correct column information for it.
|
||||||
// information for it.
|
|
||||||
const uint8_t *cursor = parser->current.end;
|
const uint8_t *cursor = parser->current.end;
|
||||||
while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
|
while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
|
||||||
pm_newline_list_append(&parser->newline_list, cursor++);
|
pm_newline_list_append(&parser->newline_list, cursor++);
|
||||||
@ -18006,22 +18077,39 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the name of the capture group is a valid local variable that
|
||||||
|
* can be written to.
|
||||||
|
*/
|
||||||
static bool
|
static bool
|
||||||
name_is_identifier(pm_parser_t *parser, const uint8_t *source, size_t length) {
|
parse_regular_expression_named_capture(pm_parser_t *parser, const uint8_t *source, size_t length) {
|
||||||
if (length == 0) {
|
if (length == 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// First ensure that it starts with a valid identifier starting character.
|
||||||
size_t width = char_is_identifier_start(parser, source);
|
size_t width = char_is_identifier_start(parser, source);
|
||||||
if (!width) {
|
if (!width) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t *cursor = ((uint8_t *)source) + width;
|
// Next, ensure that it's not an uppercase character.
|
||||||
|
if (parser->encoding_changed) {
|
||||||
|
if (parser->encoding->isupper_char(source, (ptrdiff_t) length)) return false;
|
||||||
|
} else {
|
||||||
|
if (pm_encoding_utf_8_isupper_char(source, (ptrdiff_t) length)) return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next, iterate through all of the bytes of the string to ensure that they
|
||||||
|
// are all valid identifier characters.
|
||||||
|
const uint8_t *cursor = source + width;
|
||||||
while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
|
while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
|
||||||
cursor += width;
|
cursor += width;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Finally, validate that the identifier is not a keywor.
|
||||||
|
if (pm_local_is_keyword((const char *) source, length)) return false;
|
||||||
|
|
||||||
return cursor == source + length;
|
return cursor == source + length;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -18051,7 +18139,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
|
|||||||
|
|
||||||
// If the name of the capture group isn't a valid identifier, we do
|
// If the name of the capture group isn't a valid identifier, we do
|
||||||
// not add it to the local table.
|
// not add it to the local table.
|
||||||
if (!name_is_identifier(parser, source, length)) continue;
|
if (!parse_regular_expression_named_capture(parser, source, length)) continue;
|
||||||
|
|
||||||
if (content->type == PM_STRING_SHARED) {
|
if (content->type == PM_STRING_SHARED) {
|
||||||
// If the unescaped string is a slice of the source, then we can
|
// If the unescaped string is a slice of the source, then we can
|
||||||
|
@ -38,3 +38,7 @@ b>)/ =~ ""; ab
|
|||||||
|
|
||||||
a = 1
|
a = 1
|
||||||
tap { /(?<a>)/ =~ to_s }
|
tap { /(?<a>)/ =~ to_s }
|
||||||
|
|
||||||
|
/(?<foo>)/ =~ ""
|
||||||
|
/(?<Foo>)/ =~ ""
|
||||||
|
/(?<nil>)/ =~ ""
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
@ ProgramNode (location: (1,0)-(40,24))
|
@ ProgramNode (location: (1,0)-(44,16))
|
||||||
├── locals: [:foo, :ab, :abc, :a]
|
├── locals: [:foo, :ab, :abc, :a]
|
||||||
└── statements:
|
└── statements:
|
||||||
@ StatementsNode (location: (1,0)-(40,24))
|
@ StatementsNode (location: (1,0)-(44,16))
|
||||||
└── body: (length: 21)
|
└── body: (length: 24)
|
||||||
├── @ CallNode (location: (1,0)-(1,9))
|
├── @ CallNode (location: (1,0)-(1,9))
|
||||||
│ ├── flags: ignore_visibility
|
│ ├── flags: ignore_visibility
|
||||||
│ ├── receiver: ∅
|
│ ├── receiver: ∅
|
||||||
@ -316,56 +316,137 @@
|
|||||||
│ │ ├── flags: decimal
|
│ │ ├── flags: decimal
|
||||||
│ │ └── value: 1
|
│ │ └── value: 1
|
||||||
│ └── operator_loc: (39,2)-(39,3) = "="
|
│ └── operator_loc: (39,2)-(39,3) = "="
|
||||||
└── @ CallNode (location: (40,0)-(40,24))
|
├── @ CallNode (location: (40,0)-(40,24))
|
||||||
├── flags: ignore_visibility
|
│ ├── flags: ignore_visibility
|
||||||
├── receiver: ∅
|
│ ├── receiver: ∅
|
||||||
|
│ ├── call_operator_loc: ∅
|
||||||
|
│ ├── name: :tap
|
||||||
|
│ ├── message_loc: (40,0)-(40,3) = "tap"
|
||||||
|
│ ├── opening_loc: ∅
|
||||||
|
│ ├── arguments: ∅
|
||||||
|
│ ├── closing_loc: ∅
|
||||||
|
│ └── block:
|
||||||
|
│ @ BlockNode (location: (40,4)-(40,24))
|
||||||
|
│ ├── locals: []
|
||||||
|
│ ├── parameters: ∅
|
||||||
|
│ ├── body:
|
||||||
|
│ │ @ StatementsNode (location: (40,6)-(40,22))
|
||||||
|
│ │ └── body: (length: 1)
|
||||||
|
│ │ └── @ MatchWriteNode (location: (40,6)-(40,22))
|
||||||
|
│ │ ├── call:
|
||||||
|
│ │ │ @ CallNode (location: (40,6)-(40,22))
|
||||||
|
│ │ │ ├── flags: ∅
|
||||||
|
│ │ │ ├── receiver:
|
||||||
|
│ │ │ │ @ RegularExpressionNode (location: (40,6)-(40,14))
|
||||||
|
│ │ │ │ ├── flags: forced_us_ascii_encoding
|
||||||
|
│ │ │ │ ├── opening_loc: (40,6)-(40,7) = "/"
|
||||||
|
│ │ │ │ ├── content_loc: (40,7)-(40,13) = "(?<a>)"
|
||||||
|
│ │ │ │ ├── closing_loc: (40,13)-(40,14) = "/"
|
||||||
|
│ │ │ │ └── unescaped: "(?<a>)"
|
||||||
|
│ │ │ ├── call_operator_loc: ∅
|
||||||
|
│ │ │ ├── name: :=~
|
||||||
|
│ │ │ ├── message_loc: (40,15)-(40,17) = "=~"
|
||||||
|
│ │ │ ├── opening_loc: ∅
|
||||||
|
│ │ │ ├── arguments:
|
||||||
|
│ │ │ │ @ ArgumentsNode (location: (40,18)-(40,22))
|
||||||
|
│ │ │ │ ├── flags: ∅
|
||||||
|
│ │ │ │ └── arguments: (length: 1)
|
||||||
|
│ │ │ │ └── @ CallNode (location: (40,18)-(40,22))
|
||||||
|
│ │ │ │ ├── flags: variable_call, ignore_visibility
|
||||||
|
│ │ │ │ ├── receiver: ∅
|
||||||
|
│ │ │ │ ├── call_operator_loc: ∅
|
||||||
|
│ │ │ │ ├── name: :to_s
|
||||||
|
│ │ │ │ ├── message_loc: (40,18)-(40,22) = "to_s"
|
||||||
|
│ │ │ │ ├── opening_loc: ∅
|
||||||
|
│ │ │ │ ├── arguments: ∅
|
||||||
|
│ │ │ │ ├── closing_loc: ∅
|
||||||
|
│ │ │ │ └── block: ∅
|
||||||
|
│ │ │ ├── closing_loc: ∅
|
||||||
|
│ │ │ └── block: ∅
|
||||||
|
│ │ └── targets: (length: 1)
|
||||||
|
│ │ └── @ LocalVariableTargetNode (location: (40,10)-(40,11))
|
||||||
|
│ │ ├── name: :a
|
||||||
|
│ │ └── depth: 1
|
||||||
|
│ ├── opening_loc: (40,4)-(40,5) = "{"
|
||||||
|
│ └── closing_loc: (40,23)-(40,24) = "}"
|
||||||
|
├── @ MatchWriteNode (location: (42,0)-(42,16))
|
||||||
|
│ ├── call:
|
||||||
|
│ │ @ CallNode (location: (42,0)-(42,16))
|
||||||
|
│ │ ├── flags: ∅
|
||||||
|
│ │ ├── receiver:
|
||||||
|
│ │ │ @ RegularExpressionNode (location: (42,0)-(42,10))
|
||||||
|
│ │ │ ├── flags: forced_us_ascii_encoding
|
||||||
|
│ │ │ ├── opening_loc: (42,0)-(42,1) = "/"
|
||||||
|
│ │ │ ├── content_loc: (42,1)-(42,9) = "(?<foo>)"
|
||||||
|
│ │ │ ├── closing_loc: (42,9)-(42,10) = "/"
|
||||||
|
│ │ │ └── unescaped: "(?<foo>)"
|
||||||
|
│ │ ├── call_operator_loc: ∅
|
||||||
|
│ │ ├── name: :=~
|
||||||
|
│ │ ├── message_loc: (42,11)-(42,13) = "=~"
|
||||||
|
│ │ ├── opening_loc: ∅
|
||||||
|
│ │ ├── arguments:
|
||||||
|
│ │ │ @ ArgumentsNode (location: (42,14)-(42,16))
|
||||||
|
│ │ │ ├── flags: ∅
|
||||||
|
│ │ │ └── arguments: (length: 1)
|
||||||
|
│ │ │ └── @ StringNode (location: (42,14)-(42,16))
|
||||||
|
│ │ │ ├── flags: ∅
|
||||||
|
│ │ │ ├── opening_loc: (42,14)-(42,15) = "\""
|
||||||
|
│ │ │ ├── content_loc: (42,15)-(42,15) = ""
|
||||||
|
│ │ │ ├── closing_loc: (42,15)-(42,16) = "\""
|
||||||
|
│ │ │ └── unescaped: ""
|
||||||
|
│ │ ├── closing_loc: ∅
|
||||||
|
│ │ └── block: ∅
|
||||||
|
│ └── targets: (length: 1)
|
||||||
|
│ └── @ LocalVariableTargetNode (location: (42,4)-(42,7))
|
||||||
|
│ ├── name: :foo
|
||||||
|
│ └── depth: 0
|
||||||
|
├── @ CallNode (location: (43,0)-(43,16))
|
||||||
|
│ ├── flags: ∅
|
||||||
|
│ ├── receiver:
|
||||||
|
│ │ @ RegularExpressionNode (location: (43,0)-(43,10))
|
||||||
|
│ │ ├── flags: forced_us_ascii_encoding
|
||||||
|
│ │ ├── opening_loc: (43,0)-(43,1) = "/"
|
||||||
|
│ │ ├── content_loc: (43,1)-(43,9) = "(?<Foo>)"
|
||||||
|
│ │ ├── closing_loc: (43,9)-(43,10) = "/"
|
||||||
|
│ │ └── unescaped: "(?<Foo>)"
|
||||||
|
│ ├── call_operator_loc: ∅
|
||||||
|
│ ├── name: :=~
|
||||||
|
│ ├── message_loc: (43,11)-(43,13) = "=~"
|
||||||
|
│ ├── opening_loc: ∅
|
||||||
|
│ ├── arguments:
|
||||||
|
│ │ @ ArgumentsNode (location: (43,14)-(43,16))
|
||||||
|
│ │ ├── flags: ∅
|
||||||
|
│ │ └── arguments: (length: 1)
|
||||||
|
│ │ └── @ StringNode (location: (43,14)-(43,16))
|
||||||
|
│ │ ├── flags: ∅
|
||||||
|
│ │ ├── opening_loc: (43,14)-(43,15) = "\""
|
||||||
|
│ │ ├── content_loc: (43,15)-(43,15) = ""
|
||||||
|
│ │ ├── closing_loc: (43,15)-(43,16) = "\""
|
||||||
|
│ │ └── unescaped: ""
|
||||||
|
│ ├── closing_loc: ∅
|
||||||
|
│ └── block: ∅
|
||||||
|
└── @ CallNode (location: (44,0)-(44,16))
|
||||||
|
├── flags: ∅
|
||||||
|
├── receiver:
|
||||||
|
│ @ RegularExpressionNode (location: (44,0)-(44,10))
|
||||||
|
│ ├── flags: forced_us_ascii_encoding
|
||||||
|
│ ├── opening_loc: (44,0)-(44,1) = "/"
|
||||||
|
│ ├── content_loc: (44,1)-(44,9) = "(?<nil>)"
|
||||||
|
│ ├── closing_loc: (44,9)-(44,10) = "/"
|
||||||
|
│ └── unescaped: "(?<nil>)"
|
||||||
├── call_operator_loc: ∅
|
├── call_operator_loc: ∅
|
||||||
├── name: :tap
|
├── name: :=~
|
||||||
├── message_loc: (40,0)-(40,3) = "tap"
|
├── message_loc: (44,11)-(44,13) = "=~"
|
||||||
├── opening_loc: ∅
|
├── opening_loc: ∅
|
||||||
├── arguments: ∅
|
├── arguments:
|
||||||
|
│ @ ArgumentsNode (location: (44,14)-(44,16))
|
||||||
|
│ ├── flags: ∅
|
||||||
|
│ └── arguments: (length: 1)
|
||||||
|
│ └── @ StringNode (location: (44,14)-(44,16))
|
||||||
|
│ ├── flags: ∅
|
||||||
|
│ ├── opening_loc: (44,14)-(44,15) = "\""
|
||||||
|
│ ├── content_loc: (44,15)-(44,15) = ""
|
||||||
|
│ ├── closing_loc: (44,15)-(44,16) = "\""
|
||||||
|
│ └── unescaped: ""
|
||||||
├── closing_loc: ∅
|
├── closing_loc: ∅
|
||||||
└── block:
|
└── block: ∅
|
||||||
@ BlockNode (location: (40,4)-(40,24))
|
|
||||||
├── locals: []
|
|
||||||
├── parameters: ∅
|
|
||||||
├── body:
|
|
||||||
│ @ StatementsNode (location: (40,6)-(40,22))
|
|
||||||
│ └── body: (length: 1)
|
|
||||||
│ └── @ MatchWriteNode (location: (40,6)-(40,22))
|
|
||||||
│ ├── call:
|
|
||||||
│ │ @ CallNode (location: (40,6)-(40,22))
|
|
||||||
│ │ ├── flags: ∅
|
|
||||||
│ │ ├── receiver:
|
|
||||||
│ │ │ @ RegularExpressionNode (location: (40,6)-(40,14))
|
|
||||||
│ │ │ ├── flags: forced_us_ascii_encoding
|
|
||||||
│ │ │ ├── opening_loc: (40,6)-(40,7) = "/"
|
|
||||||
│ │ │ ├── content_loc: (40,7)-(40,13) = "(?<a>)"
|
|
||||||
│ │ │ ├── closing_loc: (40,13)-(40,14) = "/"
|
|
||||||
│ │ │ └── unescaped: "(?<a>)"
|
|
||||||
│ │ ├── call_operator_loc: ∅
|
|
||||||
│ │ ├── name: :=~
|
|
||||||
│ │ ├── message_loc: (40,15)-(40,17) = "=~"
|
|
||||||
│ │ ├── opening_loc: ∅
|
|
||||||
│ │ ├── arguments:
|
|
||||||
│ │ │ @ ArgumentsNode (location: (40,18)-(40,22))
|
|
||||||
│ │ │ ├── flags: ∅
|
|
||||||
│ │ │ └── arguments: (length: 1)
|
|
||||||
│ │ │ └── @ CallNode (location: (40,18)-(40,22))
|
|
||||||
│ │ │ ├── flags: variable_call, ignore_visibility
|
|
||||||
│ │ │ ├── receiver: ∅
|
|
||||||
│ │ │ ├── call_operator_loc: ∅
|
|
||||||
│ │ │ ├── name: :to_s
|
|
||||||
│ │ │ ├── message_loc: (40,18)-(40,22) = "to_s"
|
|
||||||
│ │ │ ├── opening_loc: ∅
|
|
||||||
│ │ │ ├── arguments: ∅
|
|
||||||
│ │ │ ├── closing_loc: ∅
|
|
||||||
│ │ │ └── block: ∅
|
|
||||||
│ │ ├── closing_loc: ∅
|
|
||||||
│ │ └── block: ∅
|
|
||||||
│ └── targets: (length: 1)
|
|
||||||
│ └── @ LocalVariableTargetNode (location: (40,10)-(40,11))
|
|
||||||
│ ├── name: :a
|
|
||||||
│ └── depth: 1
|
|
||||||
├── opening_loc: (40,4)-(40,5) = "{"
|
|
||||||
└── closing_loc: (40,23)-(40,24) = "}"
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user