[ruby/prism] Introduce char_is_identifier_utf8
https://github.com/ruby/prism/commit/5f43e57b0f
This commit is contained in:
parent
031e81c8f3
commit
32b5f5be7c
@ -5907,6 +5907,19 @@ char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Similar to char_is_identifier but this function assumes that the encoding
|
||||||
|
* has not been changed.
|
||||||
|
*/
|
||||||
|
static inline size_t
|
||||||
|
char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
|
||||||
|
if (*b < 0x80) {
|
||||||
|
return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
|
||||||
|
} else {
|
||||||
|
return (size_t) (pm_encoding_utf_8_alnum_char(b, end - b) || 1u);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Like the above, this function is also used extremely frequently to lex all of
|
* Like the above, this function is also used extremely frequently to lex all of
|
||||||
* the identifiers in a source file once the first character has been found. So
|
* the identifiers in a source file once the first character has been found. So
|
||||||
@ -5925,11 +5938,8 @@ char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
|
|||||||
} else {
|
} else {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
} else if (*b < 0x80) {
|
|
||||||
return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (*b == '_');
|
|
||||||
} else {
|
|
||||||
return (size_t) (pm_encoding_utf_8_alnum_char(b, parser->end - b) || 1u);
|
|
||||||
}
|
}
|
||||||
|
return char_is_identifier_utf8(b, parser->end);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Here we're defining a perfect hash for the characters that are allowed in
|
// Here we're defining a perfect hash for the characters that are allowed in
|
||||||
@ -7003,9 +7013,16 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
|
|||||||
const uint8_t *end = parser->end;
|
const uint8_t *end = parser->end;
|
||||||
const uint8_t *current_start = parser->current.start;
|
const uint8_t *current_start = parser->current.start;
|
||||||
const uint8_t *current_end = parser->current.end;
|
const uint8_t *current_end = parser->current.end;
|
||||||
|
bool encoding_changed = parser->encoding_changed;
|
||||||
|
|
||||||
while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
|
if (encoding_changed) {
|
||||||
current_end += width;
|
while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
|
||||||
|
current_end += width;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
while (current_end < end && (width = char_is_identifier_utf8(current_end, end)) > 0) {
|
||||||
|
current_end += width;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
parser->current.end = current_end;
|
parser->current.end = current_end;
|
||||||
|
|
||||||
@ -7123,7 +7140,7 @@ lex_identifier(pm_parser_t *parser, bool previous_command_start) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (parser->encoding_changed) {
|
if (encoding_changed) {
|
||||||
return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
|
return parser->encoding.isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
|
||||||
}
|
}
|
||||||
return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
|
return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user