[ruby/yarp] A couple of small stylistic changes
* `le_len` to `eol_length` * Braces on the same line as switch case * `peek_addr` -> `peek_at` * `peek_at` -> `peek_offset` * `match_line_ending_addr` -> `match_eol_at` * `match_line_ending_at` -> `match_eol_offset` https://github.com/ruby/yarp/commit/d7ffa9e64e
This commit is contained in:
parent
432702a427
commit
791d572ba8
220
yarp/yarp.c
220
yarp/yarp.c
@ -4157,10 +4157,10 @@ yp_do_loop_stack_p(yp_parser_t *parser) {
|
||||
/* Lexer check helpers */
|
||||
/******************************************************************************/
|
||||
|
||||
// Get the next character in the source starting from +cursor+. If that position is beyond the end
|
||||
// of the source then return '\0'.
|
||||
// Get the next character in the source starting from +cursor+. If that position
|
||||
// is beyond the end of the source then return '\0'.
|
||||
static inline char
|
||||
peek_addr(yp_parser_t *parser, const char *cursor) {
|
||||
peek_at(yp_parser_t *parser, const char *cursor) {
|
||||
if (cursor < parser->end) {
|
||||
return *cursor;
|
||||
} else {
|
||||
@ -4172,15 +4172,15 @@ peek_addr(yp_parser_t *parser, const char *cursor) {
|
||||
// adding the given offset. If that position is beyond the end of the source
|
||||
// then return '\0'.
|
||||
static inline char
|
||||
peek_at(yp_parser_t *parser, ptrdiff_t offset) {
|
||||
return peek_addr(parser, parser->current.end + offset);
|
||||
peek_offset(yp_parser_t *parser, ptrdiff_t offset) {
|
||||
return peek_at(parser, parser->current.end + offset);
|
||||
}
|
||||
|
||||
// Get the next character in the source starting from parser->current.end. If
|
||||
// that position is beyond the end of the source then return '\0'.
|
||||
static inline char
|
||||
peek(yp_parser_t *parser) {
|
||||
return peek_addr(parser, parser->current.end);
|
||||
return peek_at(parser, parser->current.end);
|
||||
}
|
||||
|
||||
// Get the next string of length len in the source starting from parser->current.end.
|
||||
@ -4205,6 +4205,35 @@ match(yp_parser_t *parser, char value) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Return the length of the line ending string starting at +cursor+, or 0 if it
|
||||
// is not a line ending. This function is intended to be CRLF/LF agnostic.
|
||||
static inline size_t
|
||||
match_eol_at(yp_parser_t *parser, const char *cursor) {
|
||||
if (peek_at(parser, cursor) == '\n') {
|
||||
return 1;
|
||||
}
|
||||
if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
|
||||
return 2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Return the length of the line ending string starting at
|
||||
// parser->current.end + offset, or 0 if it is not a line ending. This function
|
||||
// is intended to be CRLF/LF agnostic.
|
||||
static inline size_t
|
||||
match_eol_offset(yp_parser_t *parser, ptrdiff_t offset) {
|
||||
return match_eol_at(parser, parser->current.end + offset);
|
||||
}
|
||||
|
||||
// Return the length of the line ending string starting at parser->current.end,
|
||||
// or 0 if it is not a line ending. This function is intended to be CRLF/LF
|
||||
// agnostic.
|
||||
static inline size_t
|
||||
match_eol(yp_parser_t *parser) {
|
||||
return match_eol_at(parser, parser->current.end);
|
||||
}
|
||||
|
||||
// Skip to the next newline character or NUL byte.
|
||||
static inline const char *
|
||||
next_newline(const char *cursor, ptrdiff_t length) {
|
||||
@ -4216,33 +4245,6 @@ next_newline(const char *cursor, ptrdiff_t length) {
|
||||
return memchr(cursor, '\n', (size_t) length);
|
||||
}
|
||||
|
||||
// Return the length of the line ending string starting at +cursor+, or 0 if it is not a line
|
||||
// ending. This function is intended to be CRLF/LF agnostic.
|
||||
static inline size_t
|
||||
match_line_ending_addr(yp_parser_t *parser, const char *cursor) {
|
||||
if (peek_addr(parser, cursor) == '\n') {
|
||||
return 1;
|
||||
}
|
||||
if (peek_addr(parser, cursor) == '\r' && peek_addr(parser, cursor + 1) == '\n') {
|
||||
return 2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Return the length of the line ending string starting at parser->current.end + offset, or 0 if it
|
||||
// is not a line ending. This function is intended to be CRLF/LF agnostic.
|
||||
static inline size_t
|
||||
match_line_ending_at(yp_parser_t *parser, ptrdiff_t offset) {
|
||||
return match_line_ending_addr(parser, parser->current.end + offset);
|
||||
}
|
||||
|
||||
// Return the length of the line ending string starting at parser->current.end, or 0 if it is not a
|
||||
// line ending. This function is intended to be CRLF/LF agnostic.
|
||||
static inline size_t
|
||||
match_line_ending(yp_parser_t *parser) {
|
||||
return match_line_ending_addr(parser, parser->current.end);
|
||||
}
|
||||
|
||||
// Find the start of the encoding comment. This is effectively an inlined
|
||||
// version of strnstr with some modifications.
|
||||
static inline const char *
|
||||
@ -4515,7 +4517,7 @@ lex_optional_float_suffix(yp_parser_t *parser) {
|
||||
// Here we're going to attempt to parse the optional decimal portion of a
|
||||
// float. If it's not there, then it's okay and we'll just continue on.
|
||||
if (peek(parser) == '.') {
|
||||
if (yp_char_is_decimal_digit(peek_at(parser, 1))) {
|
||||
if (yp_char_is_decimal_digit(peek_offset(parser, 1))) {
|
||||
parser->current.end += 2;
|
||||
parser->current.end += yp_strspn_decimal_number(parser->current.end, parser->end - parser->current.end);
|
||||
type = YP_TOKEN_FLOAT;
|
||||
@ -4548,7 +4550,7 @@ static yp_token_type_t
|
||||
lex_numeric_prefix(yp_parser_t *parser) {
|
||||
yp_token_type_t type = YP_TOKEN_INTEGER;
|
||||
|
||||
if (peek_at(parser, -1) == '0') {
|
||||
if (peek_offset(parser, -1) == '0') {
|
||||
switch (*parser->current.end) {
|
||||
// 0d1111 is a decimal number
|
||||
case 'd':
|
||||
@ -4631,7 +4633,7 @@ lex_numeric_prefix(yp_parser_t *parser) {
|
||||
|
||||
// If the last character that we consumed was an underscore, then this is
|
||||
// actually an invalid integer value, and we should return an invalid token.
|
||||
if (peek_at(parser, -1) == '_') {
|
||||
if (peek_offset(parser, -1) == '_') {
|
||||
yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, "Number literal cannot end with a `_`.");
|
||||
}
|
||||
|
||||
@ -4812,7 +4814,7 @@ lex_identifier(yp_parser_t *parser, bool previous_command_start) {
|
||||
|
||||
if (
|
||||
((lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
|
||||
(peek(parser) == ':') && (peek_at(parser, 1) != ':')
|
||||
(peek(parser) == ':') && (peek_offset(parser, 1) != ':')
|
||||
) {
|
||||
// If we're in a position where we can accept a : at the end of an
|
||||
// identifier, then we'll optionally accept it.
|
||||
@ -4828,7 +4830,7 @@ lex_identifier(yp_parser_t *parser, bool previous_command_start) {
|
||||
}
|
||||
|
||||
return YP_TOKEN_IDENTIFIER;
|
||||
} else if (lex_state_p(parser, YP_LEX_STATE_FNAME) && peek_at(parser, 1) != '~' && peek_at(parser, 1) != '>' && (peek_at(parser, 1) != '=' || peek_at(parser, 2) == '>') && match(parser, '=')) {
|
||||
} else if (lex_state_p(parser, YP_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
|
||||
// If we're in a position where we can accept a = at the end of an
|
||||
// identifier, then we'll optionally accept it.
|
||||
return YP_TOKEN_IDENTIFIER;
|
||||
@ -4836,7 +4838,7 @@ lex_identifier(yp_parser_t *parser, bool previous_command_start) {
|
||||
|
||||
if (
|
||||
((lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
|
||||
peek(parser) == ':' && peek_at(parser, 1) != ':'
|
||||
peek(parser) == ':' && peek_offset(parser, 1) != ':'
|
||||
) {
|
||||
// If we're in a position where we can accept a : at the end of an
|
||||
// identifier, then we'll optionally accept it.
|
||||
@ -5329,32 +5331,32 @@ parser_lex(yp_parser_t *parser) {
|
||||
space_seen = true;
|
||||
break;
|
||||
case '\r':
|
||||
if (match_line_ending_at(parser, 1)) {
|
||||
if (match_eol_offset(parser, 1)) {
|
||||
chomping = false;
|
||||
} else {
|
||||
parser->current.end++;
|
||||
space_seen = true;
|
||||
}
|
||||
break;
|
||||
case '\\':
|
||||
{
|
||||
size_t le_len = match_line_ending_at(parser, 1);
|
||||
if (le_len) {
|
||||
if (parser->heredoc_end) {
|
||||
parser->current.end = parser->heredoc_end;
|
||||
parser->heredoc_end = NULL;
|
||||
} else {
|
||||
parser->current.end += le_len + 1;
|
||||
yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
|
||||
space_seen = true;
|
||||
}
|
||||
} else if (yp_char_is_inline_whitespace(*parser->current.end)) {
|
||||
parser->current.end += 2;
|
||||
case '\\': {
|
||||
size_t eol_length = match_eol_offset(parser, 1);
|
||||
if (eol_length) {
|
||||
if (parser->heredoc_end) {
|
||||
parser->current.end = parser->heredoc_end;
|
||||
parser->heredoc_end = NULL;
|
||||
} else {
|
||||
chomping = false;
|
||||
parser->current.end += eol_length + 1;
|
||||
yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
|
||||
space_seen = true;
|
||||
}
|
||||
} else if (yp_char_is_inline_whitespace(*parser->current.end)) {
|
||||
parser->current.end += 2;
|
||||
} else {
|
||||
chomping = false;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
chomping = false;
|
||||
break;
|
||||
@ -5364,13 +5366,14 @@ parser_lex(yp_parser_t *parser) {
|
||||
// Next, we'll set to start of this token to be the current end.
|
||||
parser->current.start = parser->current.end;
|
||||
|
||||
// We'll check if we're at the end of the file. If we are, then we need to
|
||||
// return the EOF token.
|
||||
// We'll check if we're at the end of the file. If we are, then we
|
||||
// need to return the EOF token.
|
||||
if (parser->current.end >= parser->end) {
|
||||
LEX(YP_TOKEN_EOF);
|
||||
}
|
||||
|
||||
// Finally, we'll check the current character to determine the next token.
|
||||
// Finally, we'll check the current character to determine the next
|
||||
// token.
|
||||
switch (*parser->current.end++) {
|
||||
case '\0': // NUL or end of script
|
||||
case '\004': // ^D
|
||||
@ -5385,8 +5388,9 @@ parser_lex(yp_parser_t *parser) {
|
||||
parser->current.type = YP_TOKEN_COMMENT;
|
||||
parser_lex_callback(parser);
|
||||
|
||||
// If we found a comment while lexing, then we're going to add it to the
|
||||
// list of comments in the file and keep lexing.
|
||||
// If we found a comment while lexing, then we're going to
|
||||
// add it to the list of comments in the file and keep
|
||||
// lexing.
|
||||
yp_comment_t *comment = parser_comment(parser, YP_COMMENT_INLINE);
|
||||
yp_list_append(&parser->comment_list, (yp_list_node_t *) comment);
|
||||
|
||||
@ -5398,18 +5402,21 @@ parser_lex(yp_parser_t *parser) {
|
||||
}
|
||||
/* fallthrough */
|
||||
case '\r':
|
||||
case '\n':
|
||||
{
|
||||
size_t le_len = match_line_ending_addr(parser, parser->current.end - 1);
|
||||
if (le_len) {
|
||||
// The only way you can have carriage returns in this particular loop
|
||||
// is if you have a carriage return followed by a newline. In that
|
||||
// case we'll just skip over the carriage return and continue lexing,
|
||||
// in order to make it so that the newline token encapsulates both the
|
||||
// carriage return and the newline. Note that we need to check that
|
||||
// we haven't already lexed a comment here because that falls through
|
||||
// into here as well.
|
||||
if (!lexed_comment) parser->current.end += le_len - 1 ; // skip CR
|
||||
case '\n': {
|
||||
size_t eol_length = match_eol_at(parser, parser->current.end - 1);
|
||||
if (eol_length) {
|
||||
// The only way you can have carriage returns in this
|
||||
// particular loop is if you have a carriage return
|
||||
// followed by a newline. In that case we'll just skip
|
||||
// over the carriage return and continue lexing, in
|
||||
// order to make it so that the newline token
|
||||
// encapsulates both the carriage return and the
|
||||
// newline. Note that we need to check that we haven't
|
||||
// already lexed a comment here because that falls
|
||||
// through into here as well.
|
||||
if (!lexed_comment) {
|
||||
parser->current.end += eol_length - 1; // skip CR
|
||||
}
|
||||
|
||||
if (parser->heredoc_end == NULL) {
|
||||
yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
|
||||
@ -5472,11 +5479,13 @@ parser_lex(yp_parser_t *parser) {
|
||||
|
||||
// If the lex state was ignored, or we hit a '.' or a '&.',
|
||||
// we will lex the ignored newline
|
||||
if (lex_state_ignored_p(parser) ||
|
||||
if (
|
||||
lex_state_ignored_p(parser) ||
|
||||
(following && (
|
||||
(peek_addr(parser, following) == '.') ||
|
||||
(peek_addr(parser, following) == '&' && peek_addr(parser, following + 1) == '.')
|
||||
))) {
|
||||
(peek_at(parser, following) == '.') ||
|
||||
(peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
|
||||
))
|
||||
) {
|
||||
if (!lexed_comment) parser_lex_ignored_newline(parser);
|
||||
lexed_comment = false;
|
||||
goto lex_next_token;
|
||||
@ -5489,7 +5498,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
// To match ripper, we need to emit an ignored newline even though
|
||||
// its a real newline in the case that we have a beginless range
|
||||
// on a subsequent line.
|
||||
if (peek_addr(parser, next_content + 1) == '.') {
|
||||
if (peek_at(parser, next_content + 1) == '.') {
|
||||
if (!lexed_comment) parser_lex_ignored_newline(parser);
|
||||
lex_state_set(parser, YP_LEX_STATE_BEG);
|
||||
parser->command_start = true;
|
||||
@ -5507,7 +5516,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
|
||||
// If we hit a &. after a newline, then we're in a call chain and
|
||||
// we need to return the call operator.
|
||||
if (peek_addr(parser, next_content) == '&' && peek_addr(parser, next_content + 1) == '.') {
|
||||
if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
|
||||
if (!lexed_comment) parser_lex_ignored_newline(parser);
|
||||
lex_state_set(parser, YP_LEX_STATE_DOT);
|
||||
parser->current.start = next_content;
|
||||
@ -5704,7 +5713,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
|
||||
// = => =~ == === =begin
|
||||
case '=':
|
||||
if (current_token_starts_line(parser) && strncmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_at(parser, 5))) {
|
||||
if (current_token_starts_line(parser) && strncmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_offset(parser, 5))) {
|
||||
yp_token_type_t type = lex_embdoc(parser);
|
||||
|
||||
if (type == YP_TOKEN_EOF) {
|
||||
@ -6209,9 +6218,9 @@ parser_lex(yp_parser_t *parser) {
|
||||
if (!parser->encoding.alnum_char(parser->current.end, parser->end - parser->current.end)) {
|
||||
lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end));
|
||||
|
||||
size_t le_len = match_line_ending(parser);
|
||||
if (le_len) {
|
||||
parser->current.end += le_len;
|
||||
size_t eol_length = match_eol(parser);
|
||||
if (eol_length) {
|
||||
parser->current.end += eol_length;
|
||||
yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
|
||||
} else {
|
||||
parser->current.end++;
|
||||
@ -6370,7 +6379,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
((parser->current.end - parser->current.start) == 7) &&
|
||||
current_token_starts_line(parser) &&
|
||||
(strncmp(parser->current.start, "__END__", 7) == 0) &&
|
||||
(parser->current.end == parser->end || match_line_ending(parser))
|
||||
(parser->current.end == parser->end || match_eol(parser))
|
||||
)
|
||||
{
|
||||
parser->current.end = parser->end;
|
||||
@ -6429,7 +6438,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
|
||||
if ((whitespace = yp_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list, should_stop)) > 0) {
|
||||
parser->current.end += whitespace;
|
||||
if (peek_at(parser, -1) == '\n') {
|
||||
if (peek_offset(parser, -1) == '\n') {
|
||||
// mutates next_start
|
||||
parser_flush_heredoc_end(parser);
|
||||
}
|
||||
@ -6697,9 +6706,9 @@ parser_lex(yp_parser_t *parser) {
|
||||
|
||||
// Otherwise we need to switch back to the parent lex mode and
|
||||
// return the end of the string.
|
||||
size_t le_len = match_line_ending_addr(parser, breakpoint);
|
||||
if (le_len) {
|
||||
parser->current.end = breakpoint + le_len;
|
||||
size_t eol_length = match_eol_at(parser, breakpoint);
|
||||
if (eol_length) {
|
||||
parser->current.end = breakpoint + eol_length;
|
||||
yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
|
||||
} else {
|
||||
parser->current.end = breakpoint + 1;
|
||||
@ -6708,7 +6717,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
if (
|
||||
parser->lex_modes.current->as.string.label_allowed &&
|
||||
(peek(parser) == ':') &&
|
||||
(peek_at(parser, 1) != ':')
|
||||
(peek_offset(parser, 1) != ':')
|
||||
) {
|
||||
parser->current.end++;
|
||||
lex_state_set(parser, YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED);
|
||||
@ -6812,9 +6821,9 @@ parser_lex(yp_parser_t *parser) {
|
||||
bool matched = true;
|
||||
bool at_end = false;
|
||||
|
||||
size_t le_len = match_line_ending_addr(parser, start + ident_length);
|
||||
if (le_len) {
|
||||
parser->current.end = start + ident_length + le_len;
|
||||
size_t eol_length = match_eol_at(parser, start + ident_length);
|
||||
if (eol_length) {
|
||||
parser->current.end = start + ident_length + eol_length;
|
||||
yp_newline_list_append(&parser->newline_list, parser->current.end - 1);
|
||||
} else if (parser->end == (start + ident_length)) {
|
||||
parser->current.end = start + ident_length;
|
||||
@ -6883,7 +6892,7 @@ parser_lex(yp_parser_t *parser) {
|
||||
// Heredoc terminators must be followed by a newline, CRLF, or EOF to be valid.
|
||||
if (
|
||||
start + ident_length == parser->end ||
|
||||
match_line_ending_addr(parser, start + ident_length)
|
||||
match_eol_at(parser, start + ident_length)
|
||||
) {
|
||||
parser->current.end = breakpoint + 1;
|
||||
LEX(YP_TOKEN_STRING_CONTENT);
|
||||
@ -6902,9 +6911,9 @@ parser_lex(yp_parser_t *parser) {
|
||||
// stop looping before the newline and not after the
|
||||
// newline so that we can still potentially find the
|
||||
// terminator of the heredoc.
|
||||
size_t le_len = match_line_ending_addr(parser, breakpoint + 1);
|
||||
if (le_len) {
|
||||
breakpoint += le_len;
|
||||
size_t eol_length = match_eol_at(parser, breakpoint + 1);
|
||||
if (eol_length) {
|
||||
breakpoint += eol_length;
|
||||
} else {
|
||||
yp_unescape_type_t unescape_type = (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL;
|
||||
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
|
||||
@ -9189,10 +9198,11 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
|
||||
const char *cur_char = content_loc->start;
|
||||
|
||||
while (cur_char && cur_char < content_loc->end) {
|
||||
// Any empty newlines aren't included in the minimum whitespace calculation
|
||||
size_t le_len;
|
||||
while ((le_len = match_line_ending_addr(parser, cur_char))) {
|
||||
cur_char += le_len;
|
||||
// Any empty newlines aren't included in the minimum whitespace
|
||||
// calculation.
|
||||
size_t eol_length;
|
||||
while ((eol_length = match_eol_at(parser, cur_char))) {
|
||||
cur_char += eol_length;
|
||||
}
|
||||
|
||||
if (cur_char == content_loc->end) break;
|
||||
@ -9208,12 +9218,12 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
|
||||
cur_char++;
|
||||
}
|
||||
|
||||
// If we hit a newline, then we have encountered a line that contains
|
||||
// only whitespace, and it shouldn't be considered in the calculation of
|
||||
// common leading whitespace.
|
||||
le_len = match_line_ending_addr(parser, cur_char);
|
||||
if (le_len) {
|
||||
cur_char += le_len;
|
||||
// If we hit a newline, then we have encountered a line that
|
||||
// contains only whitespace, and it shouldn't be considered in
|
||||
// the calculation of common leading whitespace.
|
||||
eol_length = match_eol_at(parser, cur_char);
|
||||
if (eol_length) {
|
||||
cur_char += eol_length;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user