[ruby/prism] Introduce char_is_identifier_utf8
https://github.com/ruby/prism/commit/5f43e57b0f
This commit is contained in:
parent
031e81c8f3
commit
32b5f5be7c
@ -5907,6 +5907,19 @@ char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Similar to char_is_identifier but this function assumes that the encoding
|
||||
* has not been changed.
|
||||
*/
|
||||
static inline size_t
|
||||
char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
||||
if (*b < 0x80) {
|
||||
return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
|
||||
} else {
|
||||
return (size_t) (pm_encoding_utf_8_alnum_char(b, end - b) || 1u);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Like the above, this function is also used extremely frequently to lex all of
|
||||
* the identifiers in a source file once the first character has been found. So
|
||||
@ -5925,11 +5938,8 @@ char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
} else if (*b < 0x80) {
|
||||
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (*b == '_');
|
||||
} else {
|
||||
return (size_t) (pm_encoding_utf_8_alnum_char(b, parser->end - b) || 1u);
|
||||
}
|
||||
return char_is_identifier_utf8(b, parser->end);
|
||||
}
|
||||
|
||||
// Here we're defining a perfect hash for the characters that are allowed in
|
||||
@ -7003,10 +7013,17 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
|
||||
const uint8_t *end = parser->end;
|
||||
const uint8_t *current_start = parser->current.start;
|
||||
const uint8_t *current_end = parser->current.end;
|
||||
bool encoding_changed = parser->encoding_changed;
|
||||
|
||||
if (encoding_changed) {
|
||||
while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
|
||||
current_end += width;
|
||||
}
|
||||
} else {
|
||||
while (current_end < end && (width = char_is_identifier_utf8(current_end, end)) > 0) {
|
||||
current_end += width;
|
||||
}
|
||||
}
|
||||
parser->current.end = current_end;
|
||||
|
||||
// Now cache the length of the identifier so that we can quickly compare it
|
||||
@ -7123,7 +7140,7 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
|
||||
}
|
||||
}
|
||||
|
||||
if (parser->encoding_changed) {
|
||||
if (encoding_changed) {
|
||||
return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
|
||||
}
|
||||
return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
|
||||
|
Loading…
x
Reference in New Issue
Block a user