[ruby/prism] Reject invalid capture groups (keywords)
https://github.com/ruby/prism/commit/bb78d83e88
This commit is contained in:
parent
bb3cbdfe2f
commit
8780059c38
110
prism/prism.c
110
prism/prism.c
@ -1184,6 +1184,77 @@ token_is_setter_name(pm_token_t *token) {
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the given local variable is a keyword.
|
||||
*/
|
||||
static bool
|
||||
pm_local_is_keyword(const char *source, size_t length) {
|
||||
#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
|
||||
|
||||
switch (length) {
|
||||
case 2:
|
||||
switch (source[0]) {
|
||||
case 'd': KEYWORD("do"); return false;
|
||||
case 'i': KEYWORD("if"); KEYWORD("in"); return false;
|
||||
case 'o': KEYWORD("or"); return false;
|
||||
default: return false;
|
||||
}
|
||||
case 3:
|
||||
switch (source[0]) {
|
||||
case 'a': KEYWORD("and"); return false;
|
||||
case 'd': KEYWORD("def"); return false;
|
||||
case 'e': KEYWORD("end"); return false;
|
||||
case 'f': KEYWORD("for"); return false;
|
||||
case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
|
||||
default: return false;
|
||||
}
|
||||
case 4:
|
||||
switch (source[0]) {
|
||||
case 'c': KEYWORD("case"); return false;
|
||||
case 'e': KEYWORD("else"); return false;
|
||||
case 'n': KEYWORD("next"); return false;
|
||||
case 'r': KEYWORD("redo"); return false;
|
||||
case 's': KEYWORD("self"); return false;
|
||||
case 't': KEYWORD("then"); KEYWORD("true"); return false;
|
||||
case 'w': KEYWORD("when"); return false;
|
||||
default: return false;
|
||||
}
|
||||
case 5:
|
||||
switch (source[0]) {
|
||||
case 'a': KEYWORD("alias"); return false;
|
||||
case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
|
||||
case 'c': KEYWORD("class"); return false;
|
||||
case 'e': KEYWORD("elsif"); return false;
|
||||
case 'f': KEYWORD("false"); return false;
|
||||
case 'r': KEYWORD("retry"); return false;
|
||||
case 's': KEYWORD("super"); return false;
|
||||
case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
|
||||
case 'w': KEYWORD("while"); return false;
|
||||
case 'y': KEYWORD("yield"); return false;
|
||||
default: return false;
|
||||
}
|
||||
case 6:
|
||||
switch (source[0]) {
|
||||
case 'e': KEYWORD("ensure"); return false;
|
||||
case 'm': KEYWORD("module"); return false;
|
||||
case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
|
||||
case 'u': KEYWORD("unless"); return false;
|
||||
default: return false;
|
||||
}
|
||||
case 8:
|
||||
KEYWORD("__LINE__");
|
||||
KEYWORD("__FILE__");
|
||||
return false;
|
||||
case 12:
|
||||
KEYWORD("__ENCODING__");
|
||||
return false;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
#undef KEYWORD
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/* Node flag handling functions */
|
||||
/******************************************************************************/
|
||||
@ -10576,19 +10647,19 @@ parser_lex(pm_parser_t *parser) {
|
||||
|
||||
pm_token_type_t type = lex_identifier(parser, previous_command_start);
|
||||
|
||||
// If we've hit a __END__ and it was at the start of the line or the
|
||||
// start of the file and it is followed by either a \n or a \r\n, then
|
||||
// this is the last token of the file.
|
||||
// If we've hit a __END__ and it was at the start of the
|
||||
// line or the start of the file and it is followed by
|
||||
// either a \n or a \r\n, then this is the last token of the
|
||||
// file.
|
||||
if (
|
||||
((parser->current.end - parser->current.start) == 7) &&
|
||||
current_token_starts_line(parser) &&
|
||||
(memcmp(parser->current.start, "__END__", 7) == 0) &&
|
||||
(parser->current.end == parser->end || match_eol(parser))
|
||||
)
|
||||
{
|
||||
// Since we know we're about to add an __END__ comment, we know we
|
||||
// need to add all of the newlines to get the correct column
|
||||
// information for it.
|
||||
) {
|
||||
// Since we know we're about to add an __END__ comment,
|
||||
// we know we need to add all of the newlines to get the
|
||||
// correct column information for it.
|
||||
const uint8_t *cursor = parser->current.end;
|
||||
while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
|
||||
pm_newline_list_append(&parser->newline_list, cursor++);
|
||||
@ -18006,22 +18077,39 @@ parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the name of the capture group is a valid local variable that
|
||||
* can be written to.
|
||||
*/
|
||||
static bool
|
||||
name_is_identifier(pm_parser_t *parser, const uint8_t *source, size_t length) {
|
||||
parse_regular_expression_named_capture(pm_parser_t *parser, const uint8_t *source, size_t length) {
|
||||
if (length == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// First ensure that it starts with a valid identifier starting character.
|
||||
size_t width = char_is_identifier_start(parser, source);
|
||||
if (!width) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint8_t *cursor = ((uint8_t *)source) + width;
|
||||
// Next, ensure that it's not an uppercase character.
|
||||
if (parser->encoding_changed) {
|
||||
if (parser->encoding->isupper_char(source, (ptrdiff_t) length)) return false;
|
||||
} else {
|
||||
if (pm_encoding_utf_8_isupper_char(source, (ptrdiff_t) length)) return false;
|
||||
}
|
||||
|
||||
// Next, iterate through all of the bytes of the string to ensure that they
|
||||
// are all valid identifier characters.
|
||||
const uint8_t *cursor = source + width;
|
||||
while (cursor < source + length && (width = char_is_identifier(parser, cursor))) {
|
||||
cursor += width;
|
||||
}
|
||||
|
||||
// Finally, validate that the identifier is not a keywor.
|
||||
if (pm_local_is_keyword((const char *) source, length)) return false;
|
||||
|
||||
return cursor == source + length;
|
||||
}
|
||||
|
||||
@ -18051,7 +18139,7 @@ parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *
|
||||
|
||||
// If the name of the capture group isn't a valid identifier, we do
|
||||
// not add it to the local table.
|
||||
if (!name_is_identifier(parser, source, length)) continue;
|
||||
if (!parse_regular_expression_named_capture(parser, source, length)) continue;
|
||||
|
||||
if (content->type == PM_STRING_SHARED) {
|
||||
// If the unescaped string is a slice of the source, then we can
|
||||
|
@ -38,3 +38,7 @@ b>)/ =~ ""; ab
|
||||
|
||||
a = 1
|
||||
tap { /(?<a>)/ =~ to_s }
|
||||
|
||||
/(?<foo>)/ =~ ""
|
||||
/(?<Foo>)/ =~ ""
|
||||
/(?<nil>)/ =~ ""
|
||||
|
@ -1,8 +1,8 @@
|
||||
@ ProgramNode (location: (1,0)-(40,24))
|
||||
@ ProgramNode (location: (1,0)-(44,16))
|
||||
├── locals: [:foo, :ab, :abc, :a]
|
||||
└── statements:
|
||||
@ StatementsNode (location: (1,0)-(40,24))
|
||||
└── body: (length: 21)
|
||||
@ StatementsNode (location: (1,0)-(44,16))
|
||||
└── body: (length: 24)
|
||||
├── @ CallNode (location: (1,0)-(1,9))
|
||||
│ ├── flags: ignore_visibility
|
||||
│ ├── receiver: ∅
|
||||
@ -316,56 +316,137 @@
|
||||
│ │ ├── flags: decimal
|
||||
│ │ └── value: 1
|
||||
│ └── operator_loc: (39,2)-(39,3) = "="
|
||||
└── @ CallNode (location: (40,0)-(40,24))
|
||||
├── flags: ignore_visibility
|
||||
├── receiver: ∅
|
||||
├── @ CallNode (location: (40,0)-(40,24))
|
||||
│ ├── flags: ignore_visibility
|
||||
│ ├── receiver: ∅
|
||||
│ ├── call_operator_loc: ∅
|
||||
│ ├── name: :tap
|
||||
│ ├── message_loc: (40,0)-(40,3) = "tap"
|
||||
│ ├── opening_loc: ∅
|
||||
│ ├── arguments: ∅
|
||||
│ ├── closing_loc: ∅
|
||||
│ └── block:
|
||||
│ @ BlockNode (location: (40,4)-(40,24))
|
||||
│ ├── locals: []
|
||||
│ ├── parameters: ∅
|
||||
│ ├── body:
|
||||
│ │ @ StatementsNode (location: (40,6)-(40,22))
|
||||
│ │ └── body: (length: 1)
|
||||
│ │ └── @ MatchWriteNode (location: (40,6)-(40,22))
|
||||
│ │ ├── call:
|
||||
│ │ │ @ CallNode (location: (40,6)-(40,22))
|
||||
│ │ │ ├── flags: ∅
|
||||
│ │ │ ├── receiver:
|
||||
│ │ │ │ @ RegularExpressionNode (location: (40,6)-(40,14))
|
||||
│ │ │ │ ├── flags: forced_us_ascii_encoding
|
||||
│ │ │ │ ├── opening_loc: (40,6)-(40,7) = "/"
|
||||
│ │ │ │ ├── content_loc: (40,7)-(40,13) = "(?<a>)"
|
||||
│ │ │ │ ├── closing_loc: (40,13)-(40,14) = "/"
|
||||
│ │ │ │ └── unescaped: "(?<a>)"
|
||||
│ │ │ ├── call_operator_loc: ∅
|
||||
│ │ │ ├── name: :=~
|
||||
│ │ │ ├── message_loc: (40,15)-(40,17) = "=~"
|
||||
│ │ │ ├── opening_loc: ∅
|
||||
│ │ │ ├── arguments:
|
||||
│ │ │ │ @ ArgumentsNode (location: (40,18)-(40,22))
|
||||
│ │ │ │ ├── flags: ∅
|
||||
│ │ │ │ └── arguments: (length: 1)
|
||||
│ │ │ │ └── @ CallNode (location: (40,18)-(40,22))
|
||||
│ │ │ │ ├── flags: variable_call, ignore_visibility
|
||||
│ │ │ │ ├── receiver: ∅
|
||||
│ │ │ │ ├── call_operator_loc: ∅
|
||||
│ │ │ │ ├── name: :to_s
|
||||
│ │ │ │ ├── message_loc: (40,18)-(40,22) = "to_s"
|
||||
│ │ │ │ ├── opening_loc: ∅
|
||||
│ │ │ │ ├── arguments: ∅
|
||||
│ │ │ │ ├── closing_loc: ∅
|
||||
│ │ │ │ └── block: ∅
|
||||
│ │ │ ├── closing_loc: ∅
|
||||
│ │ │ └── block: ∅
|
||||
│ │ └── targets: (length: 1)
|
||||
│ │ └── @ LocalVariableTargetNode (location: (40,10)-(40,11))
|
||||
│ │ ├── name: :a
|
||||
│ │ └── depth: 1
|
||||
│ ├── opening_loc: (40,4)-(40,5) = "{"
|
||||
│ └── closing_loc: (40,23)-(40,24) = "}"
|
||||
├── @ MatchWriteNode (location: (42,0)-(42,16))
|
||||
│ ├── call:
|
||||
│ │ @ CallNode (location: (42,0)-(42,16))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── receiver:
|
||||
│ │ │ @ RegularExpressionNode (location: (42,0)-(42,10))
|
||||
│ │ │ ├── flags: forced_us_ascii_encoding
|
||||
│ │ │ ├── opening_loc: (42,0)-(42,1) = "/"
|
||||
│ │ │ ├── content_loc: (42,1)-(42,9) = "(?<foo>)"
|
||||
│ │ │ ├── closing_loc: (42,9)-(42,10) = "/"
|
||||
│ │ │ └── unescaped: "(?<foo>)"
|
||||
│ │ ├── call_operator_loc: ∅
|
||||
│ │ ├── name: :=~
|
||||
│ │ ├── message_loc: (42,11)-(42,13) = "=~"
|
||||
│ │ ├── opening_loc: ∅
|
||||
│ │ ├── arguments:
|
||||
│ │ │ @ ArgumentsNode (location: (42,14)-(42,16))
|
||||
│ │ │ ├── flags: ∅
|
||||
│ │ │ └── arguments: (length: 1)
|
||||
│ │ │ └── @ StringNode (location: (42,14)-(42,16))
|
||||
│ │ │ ├── flags: ∅
|
||||
│ │ │ ├── opening_loc: (42,14)-(42,15) = "\""
|
||||
│ │ │ ├── content_loc: (42,15)-(42,15) = ""
|
||||
│ │ │ ├── closing_loc: (42,15)-(42,16) = "\""
|
||||
│ │ │ └── unescaped: ""
|
||||
│ │ ├── closing_loc: ∅
|
||||
│ │ └── block: ∅
|
||||
│ └── targets: (length: 1)
|
||||
│ └── @ LocalVariableTargetNode (location: (42,4)-(42,7))
|
||||
│ ├── name: :foo
|
||||
│ └── depth: 0
|
||||
├── @ CallNode (location: (43,0)-(43,16))
|
||||
│ ├── flags: ∅
|
||||
│ ├── receiver:
|
||||
│ │ @ RegularExpressionNode (location: (43,0)-(43,10))
|
||||
│ │ ├── flags: forced_us_ascii_encoding
|
||||
│ │ ├── opening_loc: (43,0)-(43,1) = "/"
|
||||
│ │ ├── content_loc: (43,1)-(43,9) = "(?<Foo>)"
|
||||
│ │ ├── closing_loc: (43,9)-(43,10) = "/"
|
||||
│ │ └── unescaped: "(?<Foo>)"
|
||||
│ ├── call_operator_loc: ∅
|
||||
│ ├── name: :=~
|
||||
│ ├── message_loc: (43,11)-(43,13) = "=~"
|
||||
│ ├── opening_loc: ∅
|
||||
│ ├── arguments:
|
||||
│ │ @ ArgumentsNode (location: (43,14)-(43,16))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ └── arguments: (length: 1)
|
||||
│ │ └── @ StringNode (location: (43,14)-(43,16))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── opening_loc: (43,14)-(43,15) = "\""
|
||||
│ │ ├── content_loc: (43,15)-(43,15) = ""
|
||||
│ │ ├── closing_loc: (43,15)-(43,16) = "\""
|
||||
│ │ └── unescaped: ""
|
||||
│ ├── closing_loc: ∅
|
||||
│ └── block: ∅
|
||||
└── @ CallNode (location: (44,0)-(44,16))
|
||||
├── flags: ∅
|
||||
├── receiver:
|
||||
│ @ RegularExpressionNode (location: (44,0)-(44,10))
|
||||
│ ├── flags: forced_us_ascii_encoding
|
||||
│ ├── opening_loc: (44,0)-(44,1) = "/"
|
||||
│ ├── content_loc: (44,1)-(44,9) = "(?<nil>)"
|
||||
│ ├── closing_loc: (44,9)-(44,10) = "/"
|
||||
│ └── unescaped: "(?<nil>)"
|
||||
├── call_operator_loc: ∅
|
||||
├── name: :tap
|
||||
├── message_loc: (40,0)-(40,3) = "tap"
|
||||
├── name: :=~
|
||||
├── message_loc: (44,11)-(44,13) = "=~"
|
||||
├── opening_loc: ∅
|
||||
├── arguments: ∅
|
||||
├── arguments:
|
||||
│ @ ArgumentsNode (location: (44,14)-(44,16))
|
||||
│ ├── flags: ∅
|
||||
│ └── arguments: (length: 1)
|
||||
│ └── @ StringNode (location: (44,14)-(44,16))
|
||||
│ ├── flags: ∅
|
||||
│ ├── opening_loc: (44,14)-(44,15) = "\""
|
||||
│ ├── content_loc: (44,15)-(44,15) = ""
|
||||
│ ├── closing_loc: (44,15)-(44,16) = "\""
|
||||
│ └── unescaped: ""
|
||||
├── closing_loc: ∅
|
||||
└── block:
|
||||
@ BlockNode (location: (40,4)-(40,24))
|
||||
├── locals: []
|
||||
├── parameters: ∅
|
||||
├── body:
|
||||
│ @ StatementsNode (location: (40,6)-(40,22))
|
||||
│ └── body: (length: 1)
|
||||
│ └── @ MatchWriteNode (location: (40,6)-(40,22))
|
||||
│ ├── call:
|
||||
│ │ @ CallNode (location: (40,6)-(40,22))
|
||||
│ │ ├── flags: ∅
|
||||
│ │ ├── receiver:
|
||||
│ │ │ @ RegularExpressionNode (location: (40,6)-(40,14))
|
||||
│ │ │ ├── flags: forced_us_ascii_encoding
|
||||
│ │ │ ├── opening_loc: (40,6)-(40,7) = "/"
|
||||
│ │ │ ├── content_loc: (40,7)-(40,13) = "(?<a>)"
|
||||
│ │ │ ├── closing_loc: (40,13)-(40,14) = "/"
|
||||
│ │ │ └── unescaped: "(?<a>)"
|
||||
│ │ ├── call_operator_loc: ∅
|
||||
│ │ ├── name: :=~
|
||||
│ │ ├── message_loc: (40,15)-(40,17) = "=~"
|
||||
│ │ ├── opening_loc: ∅
|
||||
│ │ ├── arguments:
|
||||
│ │ │ @ ArgumentsNode (location: (40,18)-(40,22))
|
||||
│ │ │ ├── flags: ∅
|
||||
│ │ │ └── arguments: (length: 1)
|
||||
│ │ │ └── @ CallNode (location: (40,18)-(40,22))
|
||||
│ │ │ ├── flags: variable_call, ignore_visibility
|
||||
│ │ │ ├── receiver: ∅
|
||||
│ │ │ ├── call_operator_loc: ∅
|
||||
│ │ │ ├── name: :to_s
|
||||
│ │ │ ├── message_loc: (40,18)-(40,22) = "to_s"
|
||||
│ │ │ ├── opening_loc: ∅
|
||||
│ │ │ ├── arguments: ∅
|
||||
│ │ │ ├── closing_loc: ∅
|
||||
│ │ │ └── block: ∅
|
||||
│ │ ├── closing_loc: ∅
|
||||
│ │ └── block: ∅
|
||||
│ └── targets: (length: 1)
|
||||
│ └── @ LocalVariableTargetNode (location: (40,10)-(40,11))
|
||||
│ ├── name: :a
|
||||
│ └── depth: 1
|
||||
├── opening_loc: (40,4)-(40,5) = "{"
|
||||
└── closing_loc: (40,23)-(40,24) = "}"
|
||||
└── block: ∅
|
||||
|
Loading…
x
Reference in New Issue
Block a user