[ruby/prism] Faster lex_identifier
https://github.com/ruby/prism/commit/e44a9ae742
This commit is contained in:
parent
2ab247d217
commit
3a21da9591
@ -57,6 +57,7 @@ bool pm_encoding_ascii_isupper_char(const uint8_t *b, PRISM_ATTRIBUTE_UNUSED ptr
|
|||||||
// the parser so they need to be internally visible.
|
// the parser so they need to be internally visible.
|
||||||
size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
|
size_t pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n);
|
||||||
size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
|
size_t pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n);
|
||||||
|
bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n);
|
||||||
|
|
||||||
// This lookup table is referenced in both the UTF-8 encoding file and the
|
// This lookup table is referenced in both the UTF-8 encoding file and the
|
||||||
// parser directly in order to speed up the default encoding processing.
|
// parser directly in order to speed up the default encoding processing.
|
||||||
|
@ -2285,7 +2285,7 @@ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
bool
|
||||||
pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
||||||
if (*b < 0x80) {
|
if (*b < 0x80) {
|
||||||
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
|
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
|
||||||
|
@ -6044,16 +6044,21 @@ static pm_token_type_t
|
|||||||
lex_identifier(pm_parser_t *parser, bool previous_command_start) {
|
lex_identifier(pm_parser_t *parser, bool previous_command_start) {
|
||||||
// Lex as far as we can into the current identifier.
|
// Lex as far as we can into the current identifier.
|
||||||
size_t width;
|
size_t width;
|
||||||
while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
|
const uint8_t *end = parser->end;
|
||||||
parser->current.end += width;
|
const uint8_t *current_start = parser->current.start;
|
||||||
|
const uint8_t *current_end = parser->current.end;
|
||||||
|
|
||||||
|
while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
|
||||||
|
current_end += width;
|
||||||
}
|
}
|
||||||
|
parser->current.end = current_end;
|
||||||
|
|
||||||
// Now cache the length of the identifier so that we can quickly compare it
|
// Now cache the length of the identifier so that we can quickly compare it
|
||||||
// against known keywords.
|
// against known keywords.
|
||||||
width = (size_t) (parser->current.end - parser->current.start);
|
width = (size_t) (current_end - current_start);
|
||||||
|
|
||||||
if (parser->current.end < parser->end) {
|
if (current_end < end) {
|
||||||
if (((parser->current.end + 1 >= parser->end) || (parser->current.end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
|
if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
|
||||||
// First we'll attempt to extend the identifier by a ! or ?. Then we'll
|
// First we'll attempt to extend the identifier by a ! or ?. Then we'll
|
||||||
// check if we're returning the defined? keyword or just an identifier.
|
// check if we're returning the defined? keyword or just an identifier.
|
||||||
width++;
|
width++;
|
||||||
@ -6163,7 +6168,10 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return parser->encoding.isupper_char(parser->current.start, parser->end - parser->current.start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
|
if (parser->encoding_changed) {
|
||||||
|
return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
|
||||||
|
}
|
||||||
|
return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns true if the current token that the parser is considering is at the
|
// Returns true if the current token that the parser is considering is at the
|
||||||
|
Loading…
x
Reference in New Issue
Block a user